sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 20 21logger = logging.getLogger("sqlglot") 22 23OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 24 25 26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 37 38 39def build_like(args: t.List) -> exp.Escape | exp.Like: 40 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 41 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 42 43 44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range 56 57 58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 69 70 71def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 72 arg = seq_get(args, 0) 73 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 74 75 76def build_lower(args: t.List) -> exp.Lower | exp.Hex: 77 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 78 arg = seq_get(args, 0) 79 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 80 81 82def build_upper(args: t.List) -> exp.Upper | exp.Hex: 83 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 84 arg = seq_get(args, 0) 85 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 86 87 88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder 99 100 101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression) 110 111 112def build_pad(args: t.List, is_left: bool = True): 113 return exp.Pad( 114 this=seq_get(args, 0), 115 expression=seq_get(args, 1), 116 fill_pattern=seq_get(args, 2), 117 is_left=is_left, 118 ) 119 120 121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp 130 131 132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args) 142 143 144def build_trim(args: t.List, is_left: bool = True): 145 return exp.Trim( 146 this=seq_get(args, 0), 147 expression=seq_get(args, 1), 148 position="LEADING" if is_left else "TRAILING", 149 ) 150 151 152def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 153 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 154 155 156def build_locate_strposition(args: t.List): 157 return exp.StrPosition( 158 this=seq_get(args, 1), 159 substr=seq_get(args, 0), 160 position=seq_get(args, 2), 161 ) 162 163 164class _Parser(type): 165 def __new__(cls, clsname, bases, attrs): 166 klass = super().__new__(cls, clsname, bases, attrs) 167 168 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 169 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 170 171 return klass 172 173 174class Parser(metaclass=_Parser): 175 """ 176 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 177 178 Args: 179 error_level: The desired error level. 180 Default: ErrorLevel.IMMEDIATE 181 error_message_context: The amount of context to capture from a query string when displaying 182 the error message (in number of characters). 183 Default: 100 184 max_errors: Maximum number of error messages to include in a raised ParseError. 185 This is only relevant if error_level is ErrorLevel.RAISE. 186 Default: 3 187 """ 188 189 FUNCTIONS: t.Dict[str, t.Callable] = { 190 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 191 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 192 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 193 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 194 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 195 ), 196 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 197 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 198 ), 199 "CHAR": lambda args: exp.Chr(expressions=args), 200 "CHR": lambda args: exp.Chr(expressions=args), 201 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 202 "CONCAT": lambda args, dialect: exp.Concat( 203 expressions=args, 204 safe=not dialect.STRICT_STRING_CONCAT, 205 coalesce=dialect.CONCAT_COALESCE, 206 ), 207 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 208 expressions=args, 209 safe=not dialect.STRICT_STRING_CONCAT, 210 coalesce=dialect.CONCAT_COALESCE, 211 ), 212 "CONVERT_TIMEZONE": build_convert_timezone, 213 "DATE_TO_DATE_STR": lambda args: exp.Cast( 214 this=seq_get(args, 0), 215 to=exp.DataType(this=exp.DataType.Type.TEXT), 216 ), 217 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 218 start=seq_get(args, 0), 219 end=seq_get(args, 1), 220 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 221 ), 222 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 223 "HEX": build_hex, 224 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 225 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 226 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 227 "LIKE": build_like, 228 "LOG": build_logarithm, 229 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 230 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 231 "LOWER": build_lower, 232 "LPAD": lambda args: build_pad(args), 233 "LEFTPAD": lambda args: build_pad(args), 234 "LTRIM": lambda args: build_trim(args), 235 "MOD": build_mod, 236 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 237 "RPAD": lambda args: build_pad(args, is_left=False), 238 "RTRIM": lambda args: build_trim(args, is_left=False), 239 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 240 if len(args) != 2 241 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 242 "STRPOS": exp.StrPosition.from_arg_list, 243 "CHARINDEX": lambda args: build_locate_strposition(args), 244 "INSTR": exp.StrPosition.from_arg_list, 245 "LOCATE": lambda args: build_locate_strposition(args), 246 "TIME_TO_TIME_STR": lambda args: exp.Cast( 247 this=seq_get(args, 0), 248 to=exp.DataType(this=exp.DataType.Type.TEXT), 249 ), 250 "TO_HEX": build_hex, 251 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 252 this=exp.Cast( 253 this=seq_get(args, 0), 254 to=exp.DataType(this=exp.DataType.Type.TEXT), 255 ), 256 start=exp.Literal.number(1), 257 length=exp.Literal.number(10), 258 ), 259 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 260 "UPPER": build_upper, 261 "VAR_MAP": build_var_map, 262 } 263 264 NO_PAREN_FUNCTIONS = { 265 TokenType.CURRENT_DATE: exp.CurrentDate, 266 TokenType.CURRENT_DATETIME: exp.CurrentDate, 267 TokenType.CURRENT_TIME: exp.CurrentTime, 268 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 269 TokenType.CURRENT_USER: exp.CurrentUser, 270 } 271 272 STRUCT_TYPE_TOKENS = { 273 TokenType.NESTED, 274 TokenType.OBJECT, 275 TokenType.STRUCT, 276 TokenType.UNION, 277 } 278 279 NESTED_TYPE_TOKENS = { 280 TokenType.ARRAY, 281 TokenType.LIST, 282 TokenType.LOWCARDINALITY, 283 TokenType.MAP, 284 TokenType.NULLABLE, 285 TokenType.RANGE, 286 *STRUCT_TYPE_TOKENS, 287 } 288 289 ENUM_TYPE_TOKENS = { 290 TokenType.DYNAMIC, 291 TokenType.ENUM, 292 TokenType.ENUM8, 293 TokenType.ENUM16, 294 } 295 296 AGGREGATE_TYPE_TOKENS = { 297 TokenType.AGGREGATEFUNCTION, 298 TokenType.SIMPLEAGGREGATEFUNCTION, 299 } 300 301 TYPE_TOKENS = { 302 TokenType.BIT, 303 TokenType.BOOLEAN, 304 TokenType.TINYINT, 305 TokenType.UTINYINT, 306 TokenType.SMALLINT, 307 TokenType.USMALLINT, 308 TokenType.INT, 309 TokenType.UINT, 310 TokenType.BIGINT, 311 TokenType.UBIGINT, 312 TokenType.INT128, 313 TokenType.UINT128, 314 TokenType.INT256, 315 TokenType.UINT256, 316 TokenType.MEDIUMINT, 317 TokenType.UMEDIUMINT, 318 TokenType.FIXEDSTRING, 319 TokenType.FLOAT, 320 TokenType.DOUBLE, 321 TokenType.CHAR, 322 TokenType.NCHAR, 323 TokenType.VARCHAR, 324 TokenType.NVARCHAR, 325 TokenType.BPCHAR, 326 TokenType.TEXT, 327 TokenType.MEDIUMTEXT, 328 TokenType.LONGTEXT, 329 TokenType.MEDIUMBLOB, 330 TokenType.LONGBLOB, 331 TokenType.BINARY, 332 TokenType.VARBINARY, 333 TokenType.JSON, 334 TokenType.JSONB, 335 TokenType.INTERVAL, 336 TokenType.TINYBLOB, 337 TokenType.TINYTEXT, 338 TokenType.TIME, 339 TokenType.TIMETZ, 340 TokenType.TIMESTAMP, 341 TokenType.TIMESTAMP_S, 342 TokenType.TIMESTAMP_MS, 343 TokenType.TIMESTAMP_NS, 344 TokenType.TIMESTAMPTZ, 345 TokenType.TIMESTAMPLTZ, 346 TokenType.TIMESTAMPNTZ, 347 TokenType.DATETIME, 348 TokenType.DATETIME2, 349 TokenType.DATETIME64, 350 TokenType.SMALLDATETIME, 351 TokenType.DATE, 352 TokenType.DATE32, 353 TokenType.INT4RANGE, 354 TokenType.INT4MULTIRANGE, 355 TokenType.INT8RANGE, 356 TokenType.INT8MULTIRANGE, 357 TokenType.NUMRANGE, 358 TokenType.NUMMULTIRANGE, 359 TokenType.TSRANGE, 360 TokenType.TSMULTIRANGE, 361 TokenType.TSTZRANGE, 362 TokenType.TSTZMULTIRANGE, 363 TokenType.DATERANGE, 364 TokenType.DATEMULTIRANGE, 365 TokenType.DECIMAL, 366 TokenType.DECIMAL32, 367 TokenType.DECIMAL64, 368 TokenType.DECIMAL128, 369 TokenType.DECIMAL256, 370 TokenType.UDECIMAL, 371 TokenType.BIGDECIMAL, 372 TokenType.UUID, 373 TokenType.GEOGRAPHY, 374 TokenType.GEOMETRY, 375 TokenType.POINT, 376 TokenType.RING, 377 TokenType.LINESTRING, 378 TokenType.MULTILINESTRING, 379 TokenType.POLYGON, 380 TokenType.MULTIPOLYGON, 381 TokenType.HLLSKETCH, 382 TokenType.HSTORE, 383 TokenType.PSEUDO_TYPE, 384 TokenType.SUPER, 385 TokenType.SERIAL, 386 TokenType.SMALLSERIAL, 387 TokenType.BIGSERIAL, 388 TokenType.XML, 389 TokenType.YEAR, 390 TokenType.UNIQUEIDENTIFIER, 391 TokenType.USERDEFINED, 392 TokenType.MONEY, 393 TokenType.SMALLMONEY, 394 TokenType.ROWVERSION, 395 TokenType.IMAGE, 396 TokenType.VARIANT, 397 TokenType.VECTOR, 398 TokenType.OBJECT, 399 TokenType.OBJECT_IDENTIFIER, 400 TokenType.INET, 401 TokenType.IPADDRESS, 402 TokenType.IPPREFIX, 403 TokenType.IPV4, 404 TokenType.IPV6, 405 TokenType.UNKNOWN, 406 TokenType.NULL, 407 TokenType.NAME, 408 TokenType.TDIGEST, 409 TokenType.DYNAMIC, 410 *ENUM_TYPE_TOKENS, 411 *NESTED_TYPE_TOKENS, 412 *AGGREGATE_TYPE_TOKENS, 413 } 414 415 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 416 TokenType.BIGINT: TokenType.UBIGINT, 417 TokenType.INT: TokenType.UINT, 418 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 419 TokenType.SMALLINT: TokenType.USMALLINT, 420 TokenType.TINYINT: TokenType.UTINYINT, 421 TokenType.DECIMAL: TokenType.UDECIMAL, 422 } 423 424 SUBQUERY_PREDICATES = { 425 TokenType.ANY: exp.Any, 426 TokenType.ALL: exp.All, 427 TokenType.EXISTS: exp.Exists, 428 TokenType.SOME: exp.Any, 429 } 430 431 RESERVED_TOKENS = { 432 *Tokenizer.SINGLE_TOKENS.values(), 433 TokenType.SELECT, 434 } - {TokenType.IDENTIFIER} 435 436 DB_CREATABLES = { 437 TokenType.DATABASE, 438 TokenType.DICTIONARY, 439 TokenType.MODEL, 440 TokenType.NAMESPACE, 441 TokenType.SCHEMA, 442 TokenType.SEQUENCE, 443 TokenType.SINK, 444 TokenType.SOURCE, 445 TokenType.STORAGE_INTEGRATION, 446 TokenType.STREAMLIT, 447 TokenType.TABLE, 448 TokenType.TAG, 449 TokenType.VIEW, 450 TokenType.WAREHOUSE, 451 } 452 453 CREATABLES = { 454 TokenType.COLUMN, 455 TokenType.CONSTRAINT, 456 TokenType.FOREIGN_KEY, 457 TokenType.FUNCTION, 458 TokenType.INDEX, 459 TokenType.PROCEDURE, 460 *DB_CREATABLES, 461 } 462 463 ALTERABLES = { 464 TokenType.INDEX, 465 TokenType.TABLE, 466 TokenType.VIEW, 467 } 468 469 # Tokens that can represent identifiers 470 ID_VAR_TOKENS = { 471 TokenType.ALL, 472 TokenType.ATTACH, 473 TokenType.VAR, 474 TokenType.ANTI, 475 TokenType.APPLY, 476 TokenType.ASC, 477 TokenType.ASOF, 478 TokenType.AUTO_INCREMENT, 479 TokenType.BEGIN, 480 TokenType.BPCHAR, 481 TokenType.CACHE, 482 TokenType.CASE, 483 TokenType.COLLATE, 484 TokenType.COMMAND, 485 TokenType.COMMENT, 486 TokenType.COMMIT, 487 TokenType.CONSTRAINT, 488 TokenType.COPY, 489 TokenType.CUBE, 490 TokenType.CURRENT_SCHEMA, 491 TokenType.DEFAULT, 492 TokenType.DELETE, 493 TokenType.DESC, 494 TokenType.DESCRIBE, 495 TokenType.DETACH, 496 TokenType.DICTIONARY, 497 TokenType.DIV, 498 TokenType.END, 499 TokenType.EXECUTE, 500 TokenType.EXPORT, 501 TokenType.ESCAPE, 502 TokenType.FALSE, 503 TokenType.FIRST, 504 TokenType.FILTER, 505 TokenType.FINAL, 506 TokenType.FORMAT, 507 TokenType.FULL, 508 TokenType.IDENTIFIER, 509 TokenType.IS, 510 TokenType.ISNULL, 511 TokenType.INTERVAL, 512 TokenType.KEEP, 513 TokenType.KILL, 514 TokenType.LEFT, 515 TokenType.LIMIT, 516 TokenType.LOAD, 517 TokenType.MERGE, 518 TokenType.NATURAL, 519 TokenType.NEXT, 520 TokenType.OFFSET, 521 TokenType.OPERATOR, 522 TokenType.ORDINALITY, 523 TokenType.OVERLAPS, 524 TokenType.OVERWRITE, 525 TokenType.PARTITION, 526 TokenType.PERCENT, 527 TokenType.PIVOT, 528 TokenType.PRAGMA, 529 TokenType.RANGE, 530 TokenType.RECURSIVE, 531 TokenType.REFERENCES, 532 TokenType.REFRESH, 533 TokenType.RENAME, 534 TokenType.REPLACE, 535 TokenType.RIGHT, 536 TokenType.ROLLUP, 537 TokenType.ROW, 538 TokenType.ROWS, 539 TokenType.SEMI, 540 TokenType.SET, 541 TokenType.SETTINGS, 542 TokenType.SHOW, 543 TokenType.TEMPORARY, 544 TokenType.TOP, 545 TokenType.TRUE, 546 TokenType.TRUNCATE, 547 TokenType.UNIQUE, 548 TokenType.UNNEST, 549 TokenType.UNPIVOT, 550 TokenType.UPDATE, 551 TokenType.USE, 552 TokenType.VOLATILE, 553 TokenType.WINDOW, 554 *CREATABLES, 555 *SUBQUERY_PREDICATES, 556 *TYPE_TOKENS, 557 *NO_PAREN_FUNCTIONS, 558 } 559 ID_VAR_TOKENS.remove(TokenType.UNION) 560 561 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 562 TokenType.ANTI, 563 TokenType.APPLY, 564 TokenType.ASOF, 565 TokenType.FULL, 566 TokenType.LEFT, 567 TokenType.LOCK, 568 TokenType.NATURAL, 569 TokenType.RIGHT, 570 TokenType.SEMI, 571 TokenType.WINDOW, 572 } 573 574 ALIAS_TOKENS = ID_VAR_TOKENS 575 576 ARRAY_CONSTRUCTORS = { 577 "ARRAY": exp.Array, 578 "LIST": exp.List, 579 } 580 581 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 582 583 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 584 585 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 586 587 FUNC_TOKENS = { 588 TokenType.COLLATE, 589 TokenType.COMMAND, 590 TokenType.CURRENT_DATE, 591 TokenType.CURRENT_DATETIME, 592 TokenType.CURRENT_SCHEMA, 593 TokenType.CURRENT_TIMESTAMP, 594 TokenType.CURRENT_TIME, 595 TokenType.CURRENT_USER, 596 TokenType.FILTER, 597 TokenType.FIRST, 598 TokenType.FORMAT, 599 TokenType.GLOB, 600 TokenType.IDENTIFIER, 601 TokenType.INDEX, 602 TokenType.ISNULL, 603 TokenType.ILIKE, 604 TokenType.INSERT, 605 TokenType.LIKE, 606 TokenType.MERGE, 607 TokenType.NEXT, 608 TokenType.OFFSET, 609 TokenType.PRIMARY_KEY, 610 TokenType.RANGE, 611 TokenType.REPLACE, 612 TokenType.RLIKE, 613 TokenType.ROW, 614 TokenType.UNNEST, 615 TokenType.VAR, 616 TokenType.LEFT, 617 TokenType.RIGHT, 618 TokenType.SEQUENCE, 619 TokenType.DATE, 620 TokenType.DATETIME, 621 TokenType.TABLE, 622 TokenType.TIMESTAMP, 623 TokenType.TIMESTAMPTZ, 624 TokenType.TRUNCATE, 625 TokenType.WINDOW, 626 TokenType.XOR, 627 *TYPE_TOKENS, 628 *SUBQUERY_PREDICATES, 629 } 630 631 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 632 TokenType.AND: exp.And, 633 } 634 635 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 636 TokenType.COLON_EQ: exp.PropertyEQ, 637 } 638 639 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 640 TokenType.OR: exp.Or, 641 } 642 643 EQUALITY = { 644 TokenType.EQ: exp.EQ, 645 TokenType.NEQ: exp.NEQ, 646 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 647 } 648 649 COMPARISON = { 650 TokenType.GT: exp.GT, 651 TokenType.GTE: exp.GTE, 652 TokenType.LT: exp.LT, 653 TokenType.LTE: exp.LTE, 654 } 655 656 BITWISE = { 657 TokenType.AMP: exp.BitwiseAnd, 658 TokenType.CARET: exp.BitwiseXor, 659 TokenType.PIPE: exp.BitwiseOr, 660 } 661 662 TERM = { 663 TokenType.DASH: exp.Sub, 664 TokenType.PLUS: exp.Add, 665 TokenType.MOD: exp.Mod, 666 TokenType.COLLATE: exp.Collate, 667 } 668 669 FACTOR = { 670 TokenType.DIV: exp.IntDiv, 671 TokenType.LR_ARROW: exp.Distance, 672 TokenType.SLASH: exp.Div, 673 TokenType.STAR: exp.Mul, 674 } 675 676 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 677 678 TIMES = { 679 TokenType.TIME, 680 TokenType.TIMETZ, 681 } 682 683 TIMESTAMPS = { 684 TokenType.TIMESTAMP, 685 TokenType.TIMESTAMPTZ, 686 TokenType.TIMESTAMPLTZ, 687 *TIMES, 688 } 689 690 SET_OPERATIONS = { 691 TokenType.UNION, 692 TokenType.INTERSECT, 693 TokenType.EXCEPT, 694 } 695 696 JOIN_METHODS = { 697 TokenType.ASOF, 698 TokenType.NATURAL, 699 TokenType.POSITIONAL, 700 } 701 702 JOIN_SIDES = { 703 TokenType.LEFT, 704 TokenType.RIGHT, 705 TokenType.FULL, 706 } 707 708 JOIN_KINDS = { 709 TokenType.ANTI, 710 TokenType.CROSS, 711 TokenType.INNER, 712 TokenType.OUTER, 713 TokenType.SEMI, 714 TokenType.STRAIGHT_JOIN, 715 } 716 717 JOIN_HINTS: t.Set[str] = set() 718 719 LAMBDAS = { 720 TokenType.ARROW: lambda self, expressions: self.expression( 721 exp.Lambda, 722 this=self._replace_lambda( 723 self._parse_assignment(), 724 expressions, 725 ), 726 expressions=expressions, 727 ), 728 TokenType.FARROW: lambda self, expressions: self.expression( 729 exp.Kwarg, 730 this=exp.var(expressions[0].name), 731 expression=self._parse_assignment(), 732 ), 733 } 734 735 COLUMN_OPERATORS = { 736 TokenType.DOT: None, 737 TokenType.DCOLON: lambda self, this, to: self.expression( 738 exp.Cast if self.STRICT_CAST else exp.TryCast, 739 this=this, 740 to=to, 741 ), 742 TokenType.ARROW: lambda self, this, path: self.expression( 743 exp.JSONExtract, 744 this=this, 745 expression=self.dialect.to_json_path(path), 746 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 747 ), 748 TokenType.DARROW: lambda self, this, path: self.expression( 749 exp.JSONExtractScalar, 750 this=this, 751 expression=self.dialect.to_json_path(path), 752 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 753 ), 754 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 755 exp.JSONBExtract, 756 this=this, 757 expression=path, 758 ), 759 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 760 exp.JSONBExtractScalar, 761 this=this, 762 expression=path, 763 ), 764 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 765 exp.JSONBContains, 766 this=this, 767 expression=key, 768 ), 769 } 770 771 EXPRESSION_PARSERS = { 772 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 773 exp.Column: lambda self: self._parse_column(), 774 exp.Condition: lambda self: self._parse_assignment(), 775 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 776 exp.Expression: lambda self: self._parse_expression(), 777 exp.From: lambda self: self._parse_from(joins=True), 778 exp.Group: lambda self: self._parse_group(), 779 exp.Having: lambda self: self._parse_having(), 780 exp.Hint: lambda self: self._parse_hint_body(), 781 exp.Identifier: lambda self: self._parse_id_var(), 782 exp.Join: lambda self: self._parse_join(), 783 exp.Lambda: lambda self: self._parse_lambda(), 784 exp.Lateral: lambda self: self._parse_lateral(), 785 exp.Limit: lambda self: self._parse_limit(), 786 exp.Offset: lambda self: self._parse_offset(), 787 exp.Order: lambda self: self._parse_order(), 788 exp.Ordered: lambda self: self._parse_ordered(), 789 exp.Properties: lambda self: self._parse_properties(), 790 exp.Qualify: lambda self: self._parse_qualify(), 791 exp.Returning: lambda self: self._parse_returning(), 792 exp.Select: lambda self: self._parse_select(), 793 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 794 exp.Table: lambda self: self._parse_table_parts(), 795 exp.TableAlias: lambda self: self._parse_table_alias(), 796 exp.Tuple: lambda self: self._parse_value(), 797 exp.Whens: lambda self: self._parse_when_matched(), 798 exp.Where: lambda self: self._parse_where(), 799 exp.Window: lambda self: self._parse_named_window(), 800 exp.With: lambda self: self._parse_with(), 801 "JOIN_TYPE": lambda self: self._parse_join_parts(), 802 } 803 804 STATEMENT_PARSERS = { 805 TokenType.ALTER: lambda self: self._parse_alter(), 806 TokenType.ANALYZE: lambda self: self._parse_analyze(), 807 TokenType.BEGIN: lambda self: self._parse_transaction(), 808 TokenType.CACHE: lambda self: self._parse_cache(), 809 TokenType.COMMENT: lambda self: self._parse_comment(), 810 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 811 TokenType.COPY: lambda self: self._parse_copy(), 812 TokenType.CREATE: lambda self: self._parse_create(), 813 TokenType.DELETE: lambda self: self._parse_delete(), 814 TokenType.DESC: lambda self: self._parse_describe(), 815 TokenType.DESCRIBE: lambda self: self._parse_describe(), 816 TokenType.DROP: lambda self: self._parse_drop(), 817 TokenType.GRANT: lambda self: self._parse_grant(), 818 TokenType.INSERT: lambda self: self._parse_insert(), 819 TokenType.KILL: lambda self: self._parse_kill(), 820 TokenType.LOAD: lambda self: self._parse_load(), 821 TokenType.MERGE: lambda self: self._parse_merge(), 822 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 823 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 824 TokenType.REFRESH: lambda self: self._parse_refresh(), 825 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 826 TokenType.SET: lambda self: self._parse_set(), 827 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 828 TokenType.UNCACHE: lambda self: self._parse_uncache(), 829 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 830 TokenType.UPDATE: lambda self: self._parse_update(), 831 TokenType.USE: lambda self: self.expression( 832 exp.Use, 833 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 834 this=self._parse_table(schema=False), 835 ), 836 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 837 } 838 839 UNARY_PARSERS = { 840 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 841 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 842 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 843 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 844 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 845 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 846 } 847 848 STRING_PARSERS = { 849 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 850 exp.RawString, this=token.text 851 ), 852 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 853 exp.National, this=token.text 854 ), 855 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 856 TokenType.STRING: lambda self, token: self.expression( 857 exp.Literal, this=token.text, is_string=True 858 ), 859 TokenType.UNICODE_STRING: lambda self, token: self.expression( 860 exp.UnicodeString, 861 this=token.text, 862 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 863 ), 864 } 865 866 NUMERIC_PARSERS = { 867 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 868 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 869 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 870 TokenType.NUMBER: lambda self, token: self.expression( 871 exp.Literal, this=token.text, is_string=False 872 ), 873 } 874 875 PRIMARY_PARSERS = { 876 **STRING_PARSERS, 877 **NUMERIC_PARSERS, 878 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 879 TokenType.NULL: lambda self, _: self.expression(exp.Null), 880 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 881 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 882 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 883 TokenType.STAR: lambda self, _: self._parse_star_ops(), 884 } 885 886 PLACEHOLDER_PARSERS = { 887 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 888 TokenType.PARAMETER: lambda self: self._parse_parameter(), 889 TokenType.COLON: lambda self: ( 890 self.expression(exp.Placeholder, this=self._prev.text) 891 if self._match_set(self.ID_VAR_TOKENS) 892 else None 893 ), 894 } 895 896 RANGE_PARSERS = { 897 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 898 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 899 TokenType.GLOB: binary_range_parser(exp.Glob), 900 TokenType.ILIKE: binary_range_parser(exp.ILike), 901 TokenType.IN: lambda self, this: self._parse_in(this), 902 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 903 TokenType.IS: lambda self, this: self._parse_is(this), 904 TokenType.LIKE: binary_range_parser(exp.Like), 905 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 906 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 907 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 908 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 909 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 910 } 911 912 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 913 "ALLOWED_VALUES": lambda self: self.expression( 914 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 915 ), 916 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 917 "AUTO": lambda self: self._parse_auto_property(), 918 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 919 "BACKUP": lambda self: self.expression( 920 exp.BackupProperty, this=self._parse_var(any_token=True) 921 ), 922 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 923 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 924 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 925 "CHECKSUM": lambda self: self._parse_checksum(), 926 "CLUSTER BY": lambda self: self._parse_cluster(), 927 "CLUSTERED": lambda self: self._parse_clustered_by(), 928 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 929 exp.CollateProperty, **kwargs 930 ), 931 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 932 "CONTAINS": lambda self: self._parse_contains_property(), 933 "COPY": lambda self: self._parse_copy_property(), 934 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 935 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 936 "DEFINER": lambda self: self._parse_definer(), 937 "DETERMINISTIC": lambda self: self.expression( 938 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 939 ), 940 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 941 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 942 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 943 "DISTKEY": lambda self: self._parse_distkey(), 944 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 945 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 946 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 947 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 948 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 949 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 950 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 951 "FREESPACE": lambda self: self._parse_freespace(), 952 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 953 "HEAP": lambda self: self.expression(exp.HeapProperty), 954 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 955 "IMMUTABLE": lambda self: self.expression( 956 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 957 ), 958 "INHERITS": lambda self: self.expression( 959 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 960 ), 961 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 962 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 963 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 964 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 965 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 966 "LIKE": lambda self: self._parse_create_like(), 967 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 968 "LOCK": lambda self: self._parse_locking(), 969 "LOCKING": lambda self: self._parse_locking(), 970 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 971 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 972 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 973 "MODIFIES": lambda self: self._parse_modifies_property(), 974 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 975 "NO": lambda self: self._parse_no_property(), 976 "ON": lambda self: self._parse_on_property(), 977 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 978 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 979 "PARTITION": lambda self: self._parse_partitioned_of(), 980 "PARTITION BY": lambda self: self._parse_partitioned_by(), 981 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 982 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 983 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 984 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 985 "READS": lambda self: self._parse_reads_property(), 986 "REMOTE": lambda self: self._parse_remote_with_connection(), 987 "RETURNS": lambda self: self._parse_returns(), 988 "STRICT": lambda self: self.expression(exp.StrictProperty), 989 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 990 "ROW": lambda self: self._parse_row(), 991 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 992 "SAMPLE": lambda self: self.expression( 993 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 994 ), 995 "SECURE": lambda self: self.expression(exp.SecureProperty), 996 "SECURITY": lambda self: self._parse_security(), 997 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 998 "SETTINGS": lambda self: self._parse_settings_property(), 999 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1000 "SORTKEY": lambda self: self._parse_sortkey(), 1001 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1002 "STABLE": lambda self: self.expression( 1003 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1004 ), 1005 "STORED": lambda self: self._parse_stored(), 1006 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1007 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1008 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1009 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1010 "TO": lambda self: self._parse_to_table(), 1011 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1012 "TRANSFORM": lambda self: self.expression( 1013 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1014 ), 1015 "TTL": lambda self: self._parse_ttl(), 1016 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1017 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1018 "VOLATILE": lambda self: self._parse_volatile_property(), 1019 "WITH": lambda self: self._parse_with_property(), 1020 } 1021 1022 CONSTRAINT_PARSERS = { 1023 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1024 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1025 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1026 "CHARACTER SET": lambda self: self.expression( 1027 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1028 ), 1029 "CHECK": lambda self: self.expression( 1030 exp.CheckColumnConstraint, 1031 this=self._parse_wrapped(self._parse_assignment), 1032 enforced=self._match_text_seq("ENFORCED"), 1033 ), 1034 "COLLATE": lambda self: self.expression( 1035 exp.CollateColumnConstraint, 1036 this=self._parse_identifier() or self._parse_column(), 1037 ), 1038 "COMMENT": lambda self: self.expression( 1039 exp.CommentColumnConstraint, this=self._parse_string() 1040 ), 1041 "COMPRESS": lambda self: self._parse_compress(), 1042 "CLUSTERED": lambda self: self.expression( 1043 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1044 ), 1045 "NONCLUSTERED": lambda self: self.expression( 1046 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1047 ), 1048 "DEFAULT": lambda self: self.expression( 1049 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1050 ), 1051 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1052 "EPHEMERAL": lambda self: self.expression( 1053 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1054 ), 1055 "EXCLUDE": lambda self: self.expression( 1056 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1057 ), 1058 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1059 "FORMAT": lambda self: self.expression( 1060 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1061 ), 1062 "GENERATED": lambda self: self._parse_generated_as_identity(), 1063 "IDENTITY": lambda self: self._parse_auto_increment(), 1064 "INLINE": lambda self: self._parse_inline(), 1065 "LIKE": lambda self: self._parse_create_like(), 1066 "NOT": lambda self: self._parse_not_constraint(), 1067 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1068 "ON": lambda self: ( 1069 self._match(TokenType.UPDATE) 1070 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1071 ) 1072 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1073 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1074 "PERIOD": lambda self: self._parse_period_for_system_time(), 1075 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1076 "REFERENCES": lambda self: self._parse_references(match=False), 1077 "TITLE": lambda self: self.expression( 1078 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1079 ), 1080 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1081 "UNIQUE": lambda self: self._parse_unique(), 1082 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1083 "WATERMARK": lambda self: self.expression( 1084 exp.WatermarkColumnConstraint, 1085 this=self._match(TokenType.FOR) and self._parse_column(), 1086 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1087 ), 1088 "WITH": lambda self: self.expression( 1089 exp.Properties, expressions=self._parse_wrapped_properties() 1090 ), 1091 } 1092 1093 ALTER_PARSERS = { 1094 "ADD": lambda self: self._parse_alter_table_add(), 1095 "AS": lambda self: self._parse_select(), 1096 "ALTER": lambda self: self._parse_alter_table_alter(), 1097 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1098 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1099 "DROP": lambda self: self._parse_alter_table_drop(), 1100 "RENAME": lambda self: self._parse_alter_table_rename(), 1101 "SET": lambda self: self._parse_alter_table_set(), 1102 "SWAP": lambda self: self.expression( 1103 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1104 ), 1105 } 1106 1107 ALTER_ALTER_PARSERS = { 1108 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1109 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1110 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1111 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1112 } 1113 1114 SCHEMA_UNNAMED_CONSTRAINTS = { 1115 "CHECK", 1116 "EXCLUDE", 1117 "FOREIGN KEY", 1118 "LIKE", 1119 "PERIOD", 1120 "PRIMARY KEY", 1121 "UNIQUE", 1122 "WATERMARK", 1123 } 1124 1125 NO_PAREN_FUNCTION_PARSERS = { 1126 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1127 "CASE": lambda self: self._parse_case(), 1128 "CONNECT_BY_ROOT": lambda self: self.expression( 1129 exp.ConnectByRoot, this=self._parse_column() 1130 ), 1131 "IF": lambda self: self._parse_if(), 1132 } 1133 1134 INVALID_FUNC_NAME_TOKENS = { 1135 TokenType.IDENTIFIER, 1136 TokenType.STRING, 1137 } 1138 1139 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1140 1141 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1142 1143 FUNCTION_PARSERS = { 1144 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1145 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1146 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1147 "DECODE": lambda self: self._parse_decode(), 1148 "EXTRACT": lambda self: self._parse_extract(), 1149 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1150 "GAP_FILL": lambda self: self._parse_gap_fill(), 1151 "JSON_OBJECT": lambda self: self._parse_json_object(), 1152 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1153 "JSON_TABLE": lambda self: self._parse_json_table(), 1154 "MATCH": lambda self: self._parse_match_against(), 1155 "NORMALIZE": lambda self: self._parse_normalize(), 1156 "OPENJSON": lambda self: self._parse_open_json(), 1157 "OVERLAY": lambda self: self._parse_overlay(), 1158 "POSITION": lambda self: self._parse_position(), 1159 "PREDICT": lambda self: self._parse_predict(), 1160 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1161 "STRING_AGG": lambda self: self._parse_string_agg(), 1162 "SUBSTRING": lambda self: self._parse_substring(), 1163 "TRIM": lambda self: self._parse_trim(), 1164 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1165 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1166 "XMLELEMENT": lambda self: self.expression( 1167 exp.XMLElement, 1168 this=self._match_text_seq("NAME") and self._parse_id_var(), 1169 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1170 ), 1171 "XMLTABLE": lambda self: self._parse_xml_table(), 1172 } 1173 1174 QUERY_MODIFIER_PARSERS = { 1175 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1176 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1177 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1178 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1179 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1180 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1181 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1182 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1183 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1184 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1185 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1186 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1187 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1188 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1189 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1190 TokenType.CLUSTER_BY: lambda self: ( 1191 "cluster", 1192 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1193 ), 1194 TokenType.DISTRIBUTE_BY: lambda self: ( 1195 "distribute", 1196 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1197 ), 1198 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1199 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1200 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1201 } 1202 1203 SET_PARSERS = { 1204 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1205 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1206 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1207 "TRANSACTION": lambda self: self._parse_set_transaction(), 1208 } 1209 1210 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1211 1212 TYPE_LITERAL_PARSERS = { 1213 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1214 } 1215 1216 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1217 1218 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1219 1220 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1221 1222 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1223 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1224 "ISOLATION": ( 1225 ("LEVEL", "REPEATABLE", "READ"), 1226 ("LEVEL", "READ", "COMMITTED"), 1227 ("LEVEL", "READ", "UNCOMITTED"), 1228 ("LEVEL", "SERIALIZABLE"), 1229 ), 1230 "READ": ("WRITE", "ONLY"), 1231 } 1232 1233 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1234 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1235 ) 1236 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1237 1238 CREATE_SEQUENCE: OPTIONS_TYPE = { 1239 "SCALE": ("EXTEND", "NOEXTEND"), 1240 "SHARD": ("EXTEND", "NOEXTEND"), 1241 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1242 **dict.fromkeys( 1243 ( 1244 "SESSION", 1245 "GLOBAL", 1246 "KEEP", 1247 "NOKEEP", 1248 "ORDER", 1249 "NOORDER", 1250 "NOCACHE", 1251 "CYCLE", 1252 "NOCYCLE", 1253 "NOMINVALUE", 1254 "NOMAXVALUE", 1255 "NOSCALE", 1256 "NOSHARD", 1257 ), 1258 tuple(), 1259 ), 1260 } 1261 1262 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1263 1264 USABLES: OPTIONS_TYPE = dict.fromkeys( 1265 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1266 ) 1267 1268 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1269 1270 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1271 "TYPE": ("EVOLUTION",), 1272 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1273 } 1274 1275 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1276 1277 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1278 1279 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1280 "NOT": ("ENFORCED",), 1281 "MATCH": ( 1282 "FULL", 1283 "PARTIAL", 1284 "SIMPLE", 1285 ), 1286 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1287 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1288 } 1289 1290 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1291 1292 CLONE_KEYWORDS = {"CLONE", "COPY"} 1293 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1294 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1295 1296 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1297 1298 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1299 1300 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1301 1302 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1303 1304 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1305 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1306 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1307 1308 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1309 1310 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1311 1312 ADD_CONSTRAINT_TOKENS = { 1313 TokenType.CONSTRAINT, 1314 TokenType.FOREIGN_KEY, 1315 TokenType.INDEX, 1316 TokenType.KEY, 1317 TokenType.PRIMARY_KEY, 1318 TokenType.UNIQUE, 1319 } 1320 1321 DISTINCT_TOKENS = {TokenType.DISTINCT} 1322 1323 NULL_TOKENS = {TokenType.NULL} 1324 1325 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1326 1327 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1328 1329 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1330 1331 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1332 1333 ODBC_DATETIME_LITERALS = { 1334 "d": exp.Date, 1335 "t": exp.Time, 1336 "ts": exp.Timestamp, 1337 } 1338 1339 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1340 1341 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1342 1343 # The style options for the DESCRIBE statement 1344 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1345 1346 # The style options for the ANALYZE statement 1347 ANALYZE_STYLES = { 1348 "BUFFER_USAGE_LIMIT", 1349 "FULL", 1350 "LOCAL", 1351 "NO_WRITE_TO_BINLOG", 1352 "SAMPLE", 1353 "SKIP_LOCKED", 1354 "VERBOSE", 1355 } 1356 1357 ANALYZE_EXPRESSION_PARSERS = { 1358 "ALL": lambda self: self._parse_analyze_columns(), 1359 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1360 "DELETE": lambda self: self._parse_analyze_delete(), 1361 "DROP": lambda self: self._parse_analyze_histogram(), 1362 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1363 "LIST": lambda self: self._parse_analyze_list(), 1364 "PREDICATE": lambda self: self._parse_analyze_columns(), 1365 "UPDATE": lambda self: self._parse_analyze_histogram(), 1366 "VALIDATE": lambda self: self._parse_analyze_validate(), 1367 } 1368 1369 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1370 1371 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1372 1373 OPERATION_MODIFIERS: t.Set[str] = set() 1374 1375 STRICT_CAST = True 1376 1377 PREFIXED_PIVOT_COLUMNS = False 1378 IDENTIFY_PIVOT_STRINGS = False 1379 1380 LOG_DEFAULTS_TO_LN = False 1381 1382 # Whether ADD is present for each column added by ALTER TABLE 1383 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1384 1385 # Whether the table sample clause expects CSV syntax 1386 TABLESAMPLE_CSV = False 1387 1388 # The default method used for table sampling 1389 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1390 1391 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1392 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1393 1394 # Whether the TRIM function expects the characters to trim as its first argument 1395 TRIM_PATTERN_FIRST = False 1396 1397 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1398 STRING_ALIASES = False 1399 1400 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1401 MODIFIERS_ATTACHED_TO_SET_OP = True 1402 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1403 1404 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1405 NO_PAREN_IF_COMMANDS = True 1406 1407 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1408 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1409 1410 # Whether the `:` operator is used to extract a value from a VARIANT column 1411 COLON_IS_VARIANT_EXTRACT = False 1412 1413 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1414 # If this is True and '(' is not found, the keyword will be treated as an identifier 1415 VALUES_FOLLOWED_BY_PAREN = True 1416 1417 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1418 SUPPORTS_IMPLICIT_UNNEST = False 1419 1420 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1421 INTERVAL_SPANS = True 1422 1423 # Whether a PARTITION clause can follow a table reference 1424 SUPPORTS_PARTITION_SELECTION = False 1425 1426 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1427 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1428 1429 # Whether the 'AS' keyword is optional in the CTE definition syntax 1430 OPTIONAL_ALIAS_TOKEN_CTE = True 1431 1432 __slots__ = ( 1433 "error_level", 1434 "error_message_context", 1435 "max_errors", 1436 "dialect", 1437 "sql", 1438 "errors", 1439 "_tokens", 1440 "_index", 1441 "_curr", 1442 "_next", 1443 "_prev", 1444 "_prev_comments", 1445 ) 1446 1447 # Autofilled 1448 SHOW_TRIE: t.Dict = {} 1449 SET_TRIE: t.Dict = {} 1450 1451 def __init__( 1452 self, 1453 error_level: t.Optional[ErrorLevel] = None, 1454 error_message_context: int = 100, 1455 max_errors: int = 3, 1456 dialect: DialectType = None, 1457 ): 1458 from sqlglot.dialects import Dialect 1459 1460 self.error_level = error_level or ErrorLevel.IMMEDIATE 1461 self.error_message_context = error_message_context 1462 self.max_errors = max_errors 1463 self.dialect = Dialect.get_or_raise(dialect) 1464 self.reset() 1465 1466 def reset(self): 1467 self.sql = "" 1468 self.errors = [] 1469 self._tokens = [] 1470 self._index = 0 1471 self._curr = None 1472 self._next = None 1473 self._prev = None 1474 self._prev_comments = None 1475 1476 def parse( 1477 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1478 ) -> t.List[t.Optional[exp.Expression]]: 1479 """ 1480 Parses a list of tokens and returns a list of syntax trees, one tree 1481 per parsed SQL statement. 1482 1483 Args: 1484 raw_tokens: The list of tokens. 1485 sql: The original SQL string, used to produce helpful debug messages. 1486 1487 Returns: 1488 The list of the produced syntax trees. 1489 """ 1490 return self._parse( 1491 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1492 ) 1493 1494 def parse_into( 1495 self, 1496 expression_types: exp.IntoType, 1497 raw_tokens: t.List[Token], 1498 sql: t.Optional[str] = None, 1499 ) -> t.List[t.Optional[exp.Expression]]: 1500 """ 1501 Parses a list of tokens into a given Expression type. If a collection of Expression 1502 types is given instead, this method will try to parse the token list into each one 1503 of them, stopping at the first for which the parsing succeeds. 1504 1505 Args: 1506 expression_types: The expression type(s) to try and parse the token list into. 1507 raw_tokens: The list of tokens. 1508 sql: The original SQL string, used to produce helpful debug messages. 1509 1510 Returns: 1511 The target Expression. 1512 """ 1513 errors = [] 1514 for expression_type in ensure_list(expression_types): 1515 parser = self.EXPRESSION_PARSERS.get(expression_type) 1516 if not parser: 1517 raise TypeError(f"No parser registered for {expression_type}") 1518 1519 try: 1520 return self._parse(parser, raw_tokens, sql) 1521 except ParseError as e: 1522 e.errors[0]["into_expression"] = expression_type 1523 errors.append(e) 1524 1525 raise ParseError( 1526 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1527 errors=merge_errors(errors), 1528 ) from errors[-1] 1529 1530 def _parse( 1531 self, 1532 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1533 raw_tokens: t.List[Token], 1534 sql: t.Optional[str] = None, 1535 ) -> t.List[t.Optional[exp.Expression]]: 1536 self.reset() 1537 self.sql = sql or "" 1538 1539 total = len(raw_tokens) 1540 chunks: t.List[t.List[Token]] = [[]] 1541 1542 for i, token in enumerate(raw_tokens): 1543 if token.token_type == TokenType.SEMICOLON: 1544 if token.comments: 1545 chunks.append([token]) 1546 1547 if i < total - 1: 1548 chunks.append([]) 1549 else: 1550 chunks[-1].append(token) 1551 1552 expressions = [] 1553 1554 for tokens in chunks: 1555 self._index = -1 1556 self._tokens = tokens 1557 self._advance() 1558 1559 expressions.append(parse_method(self)) 1560 1561 if self._index < len(self._tokens): 1562 self.raise_error("Invalid expression / Unexpected token") 1563 1564 self.check_errors() 1565 1566 return expressions 1567 1568 def check_errors(self) -> None: 1569 """Logs or raises any found errors, depending on the chosen error level setting.""" 1570 if self.error_level == ErrorLevel.WARN: 1571 for error in self.errors: 1572 logger.error(str(error)) 1573 elif self.error_level == ErrorLevel.RAISE and self.errors: 1574 raise ParseError( 1575 concat_messages(self.errors, self.max_errors), 1576 errors=merge_errors(self.errors), 1577 ) 1578 1579 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1580 """ 1581 Appends an error in the list of recorded errors or raises it, depending on the chosen 1582 error level setting. 1583 """ 1584 token = token or self._curr or self._prev or Token.string("") 1585 start = token.start 1586 end = token.end + 1 1587 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1588 highlight = self.sql[start:end] 1589 end_context = self.sql[end : end + self.error_message_context] 1590 1591 error = ParseError.new( 1592 f"{message}. Line {token.line}, Col: {token.col}.\n" 1593 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1594 description=message, 1595 line=token.line, 1596 col=token.col, 1597 start_context=start_context, 1598 highlight=highlight, 1599 end_context=end_context, 1600 ) 1601 1602 if self.error_level == ErrorLevel.IMMEDIATE: 1603 raise error 1604 1605 self.errors.append(error) 1606 1607 def expression( 1608 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1609 ) -> E: 1610 """ 1611 Creates a new, validated Expression. 1612 1613 Args: 1614 exp_class: The expression class to instantiate. 1615 comments: An optional list of comments to attach to the expression. 1616 kwargs: The arguments to set for the expression along with their respective values. 1617 1618 Returns: 1619 The target expression. 1620 """ 1621 instance = exp_class(**kwargs) 1622 instance.add_comments(comments) if comments else self._add_comments(instance) 1623 return self.validate_expression(instance) 1624 1625 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1626 if expression and self._prev_comments: 1627 expression.add_comments(self._prev_comments) 1628 self._prev_comments = None 1629 1630 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1631 """ 1632 Validates an Expression, making sure that all its mandatory arguments are set. 1633 1634 Args: 1635 expression: The expression to validate. 1636 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1637 1638 Returns: 1639 The validated expression. 1640 """ 1641 if self.error_level != ErrorLevel.IGNORE: 1642 for error_message in expression.error_messages(args): 1643 self.raise_error(error_message) 1644 1645 return expression 1646 1647 def _find_sql(self, start: Token, end: Token) -> str: 1648 return self.sql[start.start : end.end + 1] 1649 1650 def _is_connected(self) -> bool: 1651 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1652 1653 def _advance(self, times: int = 1) -> None: 1654 self._index += times 1655 self._curr = seq_get(self._tokens, self._index) 1656 self._next = seq_get(self._tokens, self._index + 1) 1657 1658 if self._index > 0: 1659 self._prev = self._tokens[self._index - 1] 1660 self._prev_comments = self._prev.comments 1661 else: 1662 self._prev = None 1663 self._prev_comments = None 1664 1665 def _retreat(self, index: int) -> None: 1666 if index != self._index: 1667 self._advance(index - self._index) 1668 1669 def _warn_unsupported(self) -> None: 1670 if len(self._tokens) <= 1: 1671 return 1672 1673 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1674 # interested in emitting a warning for the one being currently processed. 1675 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1676 1677 logger.warning( 1678 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1679 ) 1680 1681 def _parse_command(self) -> exp.Command: 1682 self._warn_unsupported() 1683 return self.expression( 1684 exp.Command, 1685 comments=self._prev_comments, 1686 this=self._prev.text.upper(), 1687 expression=self._parse_string(), 1688 ) 1689 1690 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1691 """ 1692 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1693 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1694 solve this by setting & resetting the parser state accordingly 1695 """ 1696 index = self._index 1697 error_level = self.error_level 1698 1699 self.error_level = ErrorLevel.IMMEDIATE 1700 try: 1701 this = parse_method() 1702 except ParseError: 1703 this = None 1704 finally: 1705 if not this or retreat: 1706 self._retreat(index) 1707 self.error_level = error_level 1708 1709 return this 1710 1711 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1712 start = self._prev 1713 exists = self._parse_exists() if allow_exists else None 1714 1715 self._match(TokenType.ON) 1716 1717 materialized = self._match_text_seq("MATERIALIZED") 1718 kind = self._match_set(self.CREATABLES) and self._prev 1719 if not kind: 1720 return self._parse_as_command(start) 1721 1722 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1723 this = self._parse_user_defined_function(kind=kind.token_type) 1724 elif kind.token_type == TokenType.TABLE: 1725 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1726 elif kind.token_type == TokenType.COLUMN: 1727 this = self._parse_column() 1728 else: 1729 this = self._parse_id_var() 1730 1731 self._match(TokenType.IS) 1732 1733 return self.expression( 1734 exp.Comment, 1735 this=this, 1736 kind=kind.text, 1737 expression=self._parse_string(), 1738 exists=exists, 1739 materialized=materialized, 1740 ) 1741 1742 def _parse_to_table( 1743 self, 1744 ) -> exp.ToTableProperty: 1745 table = self._parse_table_parts(schema=True) 1746 return self.expression(exp.ToTableProperty, this=table) 1747 1748 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1749 def _parse_ttl(self) -> exp.Expression: 1750 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1751 this = self._parse_bitwise() 1752 1753 if self._match_text_seq("DELETE"): 1754 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1755 if self._match_text_seq("RECOMPRESS"): 1756 return self.expression( 1757 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1758 ) 1759 if self._match_text_seq("TO", "DISK"): 1760 return self.expression( 1761 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1762 ) 1763 if self._match_text_seq("TO", "VOLUME"): 1764 return self.expression( 1765 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1766 ) 1767 1768 return this 1769 1770 expressions = self._parse_csv(_parse_ttl_action) 1771 where = self._parse_where() 1772 group = self._parse_group() 1773 1774 aggregates = None 1775 if group and self._match(TokenType.SET): 1776 aggregates = self._parse_csv(self._parse_set_item) 1777 1778 return self.expression( 1779 exp.MergeTreeTTL, 1780 expressions=expressions, 1781 where=where, 1782 group=group, 1783 aggregates=aggregates, 1784 ) 1785 1786 def _parse_statement(self) -> t.Optional[exp.Expression]: 1787 if self._curr is None: 1788 return None 1789 1790 if self._match_set(self.STATEMENT_PARSERS): 1791 comments = self._prev_comments 1792 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1793 stmt.add_comments(comments, prepend=True) 1794 return stmt 1795 1796 if self._match_set(self.dialect.tokenizer.COMMANDS): 1797 return self._parse_command() 1798 1799 expression = self._parse_expression() 1800 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1801 return self._parse_query_modifiers(expression) 1802 1803 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1804 start = self._prev 1805 temporary = self._match(TokenType.TEMPORARY) 1806 materialized = self._match_text_seq("MATERIALIZED") 1807 1808 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1809 if not kind: 1810 return self._parse_as_command(start) 1811 1812 concurrently = self._match_text_seq("CONCURRENTLY") 1813 if_exists = exists or self._parse_exists() 1814 1815 if kind == "COLUMN": 1816 this = self._parse_column() 1817 else: 1818 this = self._parse_table_parts( 1819 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1820 ) 1821 1822 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1823 1824 if self._match(TokenType.L_PAREN, advance=False): 1825 expressions = self._parse_wrapped_csv(self._parse_types) 1826 else: 1827 expressions = None 1828 1829 return self.expression( 1830 exp.Drop, 1831 exists=if_exists, 1832 this=this, 1833 expressions=expressions, 1834 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1835 temporary=temporary, 1836 materialized=materialized, 1837 cascade=self._match_text_seq("CASCADE"), 1838 constraints=self._match_text_seq("CONSTRAINTS"), 1839 purge=self._match_text_seq("PURGE"), 1840 cluster=cluster, 1841 concurrently=concurrently, 1842 ) 1843 1844 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1845 return ( 1846 self._match_text_seq("IF") 1847 and (not not_ or self._match(TokenType.NOT)) 1848 and self._match(TokenType.EXISTS) 1849 ) 1850 1851 def _parse_create(self) -> exp.Create | exp.Command: 1852 # Note: this can't be None because we've matched a statement parser 1853 start = self._prev 1854 1855 replace = ( 1856 start.token_type == TokenType.REPLACE 1857 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1858 or self._match_pair(TokenType.OR, TokenType.ALTER) 1859 ) 1860 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1861 1862 unique = self._match(TokenType.UNIQUE) 1863 1864 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1865 clustered = True 1866 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1867 "COLUMNSTORE" 1868 ): 1869 clustered = False 1870 else: 1871 clustered = None 1872 1873 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1874 self._advance() 1875 1876 properties = None 1877 create_token = self._match_set(self.CREATABLES) and self._prev 1878 1879 if not create_token: 1880 # exp.Properties.Location.POST_CREATE 1881 properties = self._parse_properties() 1882 create_token = self._match_set(self.CREATABLES) and self._prev 1883 1884 if not properties or not create_token: 1885 return self._parse_as_command(start) 1886 1887 concurrently = self._match_text_seq("CONCURRENTLY") 1888 exists = self._parse_exists(not_=True) 1889 this = None 1890 expression: t.Optional[exp.Expression] = None 1891 indexes = None 1892 no_schema_binding = None 1893 begin = None 1894 end = None 1895 clone = None 1896 1897 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1898 nonlocal properties 1899 if properties and temp_props: 1900 properties.expressions.extend(temp_props.expressions) 1901 elif temp_props: 1902 properties = temp_props 1903 1904 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1905 this = self._parse_user_defined_function(kind=create_token.token_type) 1906 1907 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1908 extend_props(self._parse_properties()) 1909 1910 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1911 extend_props(self._parse_properties()) 1912 1913 if not expression: 1914 if self._match(TokenType.COMMAND): 1915 expression = self._parse_as_command(self._prev) 1916 else: 1917 begin = self._match(TokenType.BEGIN) 1918 return_ = self._match_text_seq("RETURN") 1919 1920 if self._match(TokenType.STRING, advance=False): 1921 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1922 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1923 expression = self._parse_string() 1924 extend_props(self._parse_properties()) 1925 else: 1926 expression = self._parse_user_defined_function_expression() 1927 1928 end = self._match_text_seq("END") 1929 1930 if return_: 1931 expression = self.expression(exp.Return, this=expression) 1932 elif create_token.token_type == TokenType.INDEX: 1933 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1934 if not self._match(TokenType.ON): 1935 index = self._parse_id_var() 1936 anonymous = False 1937 else: 1938 index = None 1939 anonymous = True 1940 1941 this = self._parse_index(index=index, anonymous=anonymous) 1942 elif create_token.token_type in self.DB_CREATABLES: 1943 table_parts = self._parse_table_parts( 1944 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1945 ) 1946 1947 # exp.Properties.Location.POST_NAME 1948 self._match(TokenType.COMMA) 1949 extend_props(self._parse_properties(before=True)) 1950 1951 this = self._parse_schema(this=table_parts) 1952 1953 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1954 extend_props(self._parse_properties()) 1955 1956 self._match(TokenType.ALIAS) 1957 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1958 # exp.Properties.Location.POST_ALIAS 1959 extend_props(self._parse_properties()) 1960 1961 if create_token.token_type == TokenType.SEQUENCE: 1962 expression = self._parse_types() 1963 extend_props(self._parse_properties()) 1964 else: 1965 expression = self._parse_ddl_select() 1966 1967 if create_token.token_type == TokenType.TABLE: 1968 # exp.Properties.Location.POST_EXPRESSION 1969 extend_props(self._parse_properties()) 1970 1971 indexes = [] 1972 while True: 1973 index = self._parse_index() 1974 1975 # exp.Properties.Location.POST_INDEX 1976 extend_props(self._parse_properties()) 1977 if not index: 1978 break 1979 else: 1980 self._match(TokenType.COMMA) 1981 indexes.append(index) 1982 elif create_token.token_type == TokenType.VIEW: 1983 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1984 no_schema_binding = True 1985 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1986 extend_props(self._parse_properties()) 1987 1988 shallow = self._match_text_seq("SHALLOW") 1989 1990 if self._match_texts(self.CLONE_KEYWORDS): 1991 copy = self._prev.text.lower() == "copy" 1992 clone = self.expression( 1993 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1994 ) 1995 1996 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1997 return self._parse_as_command(start) 1998 1999 create_kind_text = create_token.text.upper() 2000 return self.expression( 2001 exp.Create, 2002 this=this, 2003 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2004 replace=replace, 2005 refresh=refresh, 2006 unique=unique, 2007 expression=expression, 2008 exists=exists, 2009 properties=properties, 2010 indexes=indexes, 2011 no_schema_binding=no_schema_binding, 2012 begin=begin, 2013 end=end, 2014 clone=clone, 2015 concurrently=concurrently, 2016 clustered=clustered, 2017 ) 2018 2019 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2020 seq = exp.SequenceProperties() 2021 2022 options = [] 2023 index = self._index 2024 2025 while self._curr: 2026 self._match(TokenType.COMMA) 2027 if self._match_text_seq("INCREMENT"): 2028 self._match_text_seq("BY") 2029 self._match_text_seq("=") 2030 seq.set("increment", self._parse_term()) 2031 elif self._match_text_seq("MINVALUE"): 2032 seq.set("minvalue", self._parse_term()) 2033 elif self._match_text_seq("MAXVALUE"): 2034 seq.set("maxvalue", self._parse_term()) 2035 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2036 self._match_text_seq("=") 2037 seq.set("start", self._parse_term()) 2038 elif self._match_text_seq("CACHE"): 2039 # T-SQL allows empty CACHE which is initialized dynamically 2040 seq.set("cache", self._parse_number() or True) 2041 elif self._match_text_seq("OWNED", "BY"): 2042 # "OWNED BY NONE" is the default 2043 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2044 else: 2045 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2046 if opt: 2047 options.append(opt) 2048 else: 2049 break 2050 2051 seq.set("options", options if options else None) 2052 return None if self._index == index else seq 2053 2054 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2055 # only used for teradata currently 2056 self._match(TokenType.COMMA) 2057 2058 kwargs = { 2059 "no": self._match_text_seq("NO"), 2060 "dual": self._match_text_seq("DUAL"), 2061 "before": self._match_text_seq("BEFORE"), 2062 "default": self._match_text_seq("DEFAULT"), 2063 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2064 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2065 "after": self._match_text_seq("AFTER"), 2066 "minimum": self._match_texts(("MIN", "MINIMUM")), 2067 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2068 } 2069 2070 if self._match_texts(self.PROPERTY_PARSERS): 2071 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2072 try: 2073 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2074 except TypeError: 2075 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2076 2077 return None 2078 2079 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2080 return self._parse_wrapped_csv(self._parse_property) 2081 2082 def _parse_property(self) -> t.Optional[exp.Expression]: 2083 if self._match_texts(self.PROPERTY_PARSERS): 2084 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2085 2086 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2087 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2088 2089 if self._match_text_seq("COMPOUND", "SORTKEY"): 2090 return self._parse_sortkey(compound=True) 2091 2092 if self._match_text_seq("SQL", "SECURITY"): 2093 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2094 2095 index = self._index 2096 key = self._parse_column() 2097 2098 if not self._match(TokenType.EQ): 2099 self._retreat(index) 2100 return self._parse_sequence_properties() 2101 2102 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2103 if isinstance(key, exp.Column): 2104 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2105 2106 value = self._parse_bitwise() or self._parse_var(any_token=True) 2107 2108 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2109 if isinstance(value, exp.Column): 2110 value = exp.var(value.name) 2111 2112 return self.expression(exp.Property, this=key, value=value) 2113 2114 def _parse_stored(self) -> exp.FileFormatProperty: 2115 self._match(TokenType.ALIAS) 2116 2117 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2118 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2119 2120 return self.expression( 2121 exp.FileFormatProperty, 2122 this=( 2123 self.expression( 2124 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2125 ) 2126 if input_format or output_format 2127 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2128 ), 2129 ) 2130 2131 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2132 field = self._parse_field() 2133 if isinstance(field, exp.Identifier) and not field.quoted: 2134 field = exp.var(field) 2135 2136 return field 2137 2138 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2139 self._match(TokenType.EQ) 2140 self._match(TokenType.ALIAS) 2141 2142 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2143 2144 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2145 properties = [] 2146 while True: 2147 if before: 2148 prop = self._parse_property_before() 2149 else: 2150 prop = self._parse_property() 2151 if not prop: 2152 break 2153 for p in ensure_list(prop): 2154 properties.append(p) 2155 2156 if properties: 2157 return self.expression(exp.Properties, expressions=properties) 2158 2159 return None 2160 2161 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2162 return self.expression( 2163 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2164 ) 2165 2166 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2167 if self._match_texts(("DEFINER", "INVOKER")): 2168 security_specifier = self._prev.text.upper() 2169 return self.expression(exp.SecurityProperty, this=security_specifier) 2170 return None 2171 2172 def _parse_settings_property(self) -> exp.SettingsProperty: 2173 return self.expression( 2174 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2175 ) 2176 2177 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2178 if self._index >= 2: 2179 pre_volatile_token = self._tokens[self._index - 2] 2180 else: 2181 pre_volatile_token = None 2182 2183 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2184 return exp.VolatileProperty() 2185 2186 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2187 2188 def _parse_retention_period(self) -> exp.Var: 2189 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2190 number = self._parse_number() 2191 number_str = f"{number} " if number else "" 2192 unit = self._parse_var(any_token=True) 2193 return exp.var(f"{number_str}{unit}") 2194 2195 def _parse_system_versioning_property( 2196 self, with_: bool = False 2197 ) -> exp.WithSystemVersioningProperty: 2198 self._match(TokenType.EQ) 2199 prop = self.expression( 2200 exp.WithSystemVersioningProperty, 2201 **{ # type: ignore 2202 "on": True, 2203 "with": with_, 2204 }, 2205 ) 2206 2207 if self._match_text_seq("OFF"): 2208 prop.set("on", False) 2209 return prop 2210 2211 self._match(TokenType.ON) 2212 if self._match(TokenType.L_PAREN): 2213 while self._curr and not self._match(TokenType.R_PAREN): 2214 if self._match_text_seq("HISTORY_TABLE", "="): 2215 prop.set("this", self._parse_table_parts()) 2216 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2217 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2218 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2219 prop.set("retention_period", self._parse_retention_period()) 2220 2221 self._match(TokenType.COMMA) 2222 2223 return prop 2224 2225 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2226 self._match(TokenType.EQ) 2227 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2228 prop = self.expression(exp.DataDeletionProperty, on=on) 2229 2230 if self._match(TokenType.L_PAREN): 2231 while self._curr and not self._match(TokenType.R_PAREN): 2232 if self._match_text_seq("FILTER_COLUMN", "="): 2233 prop.set("filter_column", self._parse_column()) 2234 elif self._match_text_seq("RETENTION_PERIOD", "="): 2235 prop.set("retention_period", self._parse_retention_period()) 2236 2237 self._match(TokenType.COMMA) 2238 2239 return prop 2240 2241 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2242 kind = "HASH" 2243 expressions: t.Optional[t.List[exp.Expression]] = None 2244 if self._match_text_seq("BY", "HASH"): 2245 expressions = self._parse_wrapped_csv(self._parse_id_var) 2246 elif self._match_text_seq("BY", "RANDOM"): 2247 kind = "RANDOM" 2248 2249 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2250 buckets: t.Optional[exp.Expression] = None 2251 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2252 buckets = self._parse_number() 2253 2254 return self.expression( 2255 exp.DistributedByProperty, 2256 expressions=expressions, 2257 kind=kind, 2258 buckets=buckets, 2259 order=self._parse_order(), 2260 ) 2261 2262 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2263 self._match_text_seq("KEY") 2264 expressions = self._parse_wrapped_id_vars() 2265 return self.expression(expr_type, expressions=expressions) 2266 2267 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2268 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2269 prop = self._parse_system_versioning_property(with_=True) 2270 self._match_r_paren() 2271 return prop 2272 2273 if self._match(TokenType.L_PAREN, advance=False): 2274 return self._parse_wrapped_properties() 2275 2276 if self._match_text_seq("JOURNAL"): 2277 return self._parse_withjournaltable() 2278 2279 if self._match_texts(self.VIEW_ATTRIBUTES): 2280 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2281 2282 if self._match_text_seq("DATA"): 2283 return self._parse_withdata(no=False) 2284 elif self._match_text_seq("NO", "DATA"): 2285 return self._parse_withdata(no=True) 2286 2287 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2288 return self._parse_serde_properties(with_=True) 2289 2290 if self._match(TokenType.SCHEMA): 2291 return self.expression( 2292 exp.WithSchemaBindingProperty, 2293 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2294 ) 2295 2296 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2297 return self.expression( 2298 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2299 ) 2300 2301 if not self._next: 2302 return None 2303 2304 return self._parse_withisolatedloading() 2305 2306 def _parse_procedure_option(self) -> exp.Expression | None: 2307 if self._match_text_seq("EXECUTE", "AS"): 2308 return self.expression( 2309 exp.ExecuteAsProperty, 2310 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2311 or self._parse_string(), 2312 ) 2313 2314 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2315 2316 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2317 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2318 self._match(TokenType.EQ) 2319 2320 user = self._parse_id_var() 2321 self._match(TokenType.PARAMETER) 2322 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2323 2324 if not user or not host: 2325 return None 2326 2327 return exp.DefinerProperty(this=f"{user}@{host}") 2328 2329 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2330 self._match(TokenType.TABLE) 2331 self._match(TokenType.EQ) 2332 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2333 2334 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2335 return self.expression(exp.LogProperty, no=no) 2336 2337 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2338 return self.expression(exp.JournalProperty, **kwargs) 2339 2340 def _parse_checksum(self) -> exp.ChecksumProperty: 2341 self._match(TokenType.EQ) 2342 2343 on = None 2344 if self._match(TokenType.ON): 2345 on = True 2346 elif self._match_text_seq("OFF"): 2347 on = False 2348 2349 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2350 2351 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2352 return self.expression( 2353 exp.Cluster, 2354 expressions=( 2355 self._parse_wrapped_csv(self._parse_ordered) 2356 if wrapped 2357 else self._parse_csv(self._parse_ordered) 2358 ), 2359 ) 2360 2361 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2362 self._match_text_seq("BY") 2363 2364 self._match_l_paren() 2365 expressions = self._parse_csv(self._parse_column) 2366 self._match_r_paren() 2367 2368 if self._match_text_seq("SORTED", "BY"): 2369 self._match_l_paren() 2370 sorted_by = self._parse_csv(self._parse_ordered) 2371 self._match_r_paren() 2372 else: 2373 sorted_by = None 2374 2375 self._match(TokenType.INTO) 2376 buckets = self._parse_number() 2377 self._match_text_seq("BUCKETS") 2378 2379 return self.expression( 2380 exp.ClusteredByProperty, 2381 expressions=expressions, 2382 sorted_by=sorted_by, 2383 buckets=buckets, 2384 ) 2385 2386 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2387 if not self._match_text_seq("GRANTS"): 2388 self._retreat(self._index - 1) 2389 return None 2390 2391 return self.expression(exp.CopyGrantsProperty) 2392 2393 def _parse_freespace(self) -> exp.FreespaceProperty: 2394 self._match(TokenType.EQ) 2395 return self.expression( 2396 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2397 ) 2398 2399 def _parse_mergeblockratio( 2400 self, no: bool = False, default: bool = False 2401 ) -> exp.MergeBlockRatioProperty: 2402 if self._match(TokenType.EQ): 2403 return self.expression( 2404 exp.MergeBlockRatioProperty, 2405 this=self._parse_number(), 2406 percent=self._match(TokenType.PERCENT), 2407 ) 2408 2409 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2410 2411 def _parse_datablocksize( 2412 self, 2413 default: t.Optional[bool] = None, 2414 minimum: t.Optional[bool] = None, 2415 maximum: t.Optional[bool] = None, 2416 ) -> exp.DataBlocksizeProperty: 2417 self._match(TokenType.EQ) 2418 size = self._parse_number() 2419 2420 units = None 2421 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2422 units = self._prev.text 2423 2424 return self.expression( 2425 exp.DataBlocksizeProperty, 2426 size=size, 2427 units=units, 2428 default=default, 2429 minimum=minimum, 2430 maximum=maximum, 2431 ) 2432 2433 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2434 self._match(TokenType.EQ) 2435 always = self._match_text_seq("ALWAYS") 2436 manual = self._match_text_seq("MANUAL") 2437 never = self._match_text_seq("NEVER") 2438 default = self._match_text_seq("DEFAULT") 2439 2440 autotemp = None 2441 if self._match_text_seq("AUTOTEMP"): 2442 autotemp = self._parse_schema() 2443 2444 return self.expression( 2445 exp.BlockCompressionProperty, 2446 always=always, 2447 manual=manual, 2448 never=never, 2449 default=default, 2450 autotemp=autotemp, 2451 ) 2452 2453 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2454 index = self._index 2455 no = self._match_text_seq("NO") 2456 concurrent = self._match_text_seq("CONCURRENT") 2457 2458 if not self._match_text_seq("ISOLATED", "LOADING"): 2459 self._retreat(index) 2460 return None 2461 2462 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2463 return self.expression( 2464 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2465 ) 2466 2467 def _parse_locking(self) -> exp.LockingProperty: 2468 if self._match(TokenType.TABLE): 2469 kind = "TABLE" 2470 elif self._match(TokenType.VIEW): 2471 kind = "VIEW" 2472 elif self._match(TokenType.ROW): 2473 kind = "ROW" 2474 elif self._match_text_seq("DATABASE"): 2475 kind = "DATABASE" 2476 else: 2477 kind = None 2478 2479 if kind in ("DATABASE", "TABLE", "VIEW"): 2480 this = self._parse_table_parts() 2481 else: 2482 this = None 2483 2484 if self._match(TokenType.FOR): 2485 for_or_in = "FOR" 2486 elif self._match(TokenType.IN): 2487 for_or_in = "IN" 2488 else: 2489 for_or_in = None 2490 2491 if self._match_text_seq("ACCESS"): 2492 lock_type = "ACCESS" 2493 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2494 lock_type = "EXCLUSIVE" 2495 elif self._match_text_seq("SHARE"): 2496 lock_type = "SHARE" 2497 elif self._match_text_seq("READ"): 2498 lock_type = "READ" 2499 elif self._match_text_seq("WRITE"): 2500 lock_type = "WRITE" 2501 elif self._match_text_seq("CHECKSUM"): 2502 lock_type = "CHECKSUM" 2503 else: 2504 lock_type = None 2505 2506 override = self._match_text_seq("OVERRIDE") 2507 2508 return self.expression( 2509 exp.LockingProperty, 2510 this=this, 2511 kind=kind, 2512 for_or_in=for_or_in, 2513 lock_type=lock_type, 2514 override=override, 2515 ) 2516 2517 def _parse_partition_by(self) -> t.List[exp.Expression]: 2518 if self._match(TokenType.PARTITION_BY): 2519 return self._parse_csv(self._parse_assignment) 2520 return [] 2521 2522 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2523 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2524 if self._match_text_seq("MINVALUE"): 2525 return exp.var("MINVALUE") 2526 if self._match_text_seq("MAXVALUE"): 2527 return exp.var("MAXVALUE") 2528 return self._parse_bitwise() 2529 2530 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2531 expression = None 2532 from_expressions = None 2533 to_expressions = None 2534 2535 if self._match(TokenType.IN): 2536 this = self._parse_wrapped_csv(self._parse_bitwise) 2537 elif self._match(TokenType.FROM): 2538 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2539 self._match_text_seq("TO") 2540 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2541 elif self._match_text_seq("WITH", "(", "MODULUS"): 2542 this = self._parse_number() 2543 self._match_text_seq(",", "REMAINDER") 2544 expression = self._parse_number() 2545 self._match_r_paren() 2546 else: 2547 self.raise_error("Failed to parse partition bound spec.") 2548 2549 return self.expression( 2550 exp.PartitionBoundSpec, 2551 this=this, 2552 expression=expression, 2553 from_expressions=from_expressions, 2554 to_expressions=to_expressions, 2555 ) 2556 2557 # https://www.postgresql.org/docs/current/sql-createtable.html 2558 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2559 if not self._match_text_seq("OF"): 2560 self._retreat(self._index - 1) 2561 return None 2562 2563 this = self._parse_table(schema=True) 2564 2565 if self._match(TokenType.DEFAULT): 2566 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2567 elif self._match_text_seq("FOR", "VALUES"): 2568 expression = self._parse_partition_bound_spec() 2569 else: 2570 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2571 2572 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2573 2574 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2575 self._match(TokenType.EQ) 2576 return self.expression( 2577 exp.PartitionedByProperty, 2578 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2579 ) 2580 2581 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2582 if self._match_text_seq("AND", "STATISTICS"): 2583 statistics = True 2584 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2585 statistics = False 2586 else: 2587 statistics = None 2588 2589 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2590 2591 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2592 if self._match_text_seq("SQL"): 2593 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2594 return None 2595 2596 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2597 if self._match_text_seq("SQL", "DATA"): 2598 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2599 return None 2600 2601 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2602 if self._match_text_seq("PRIMARY", "INDEX"): 2603 return exp.NoPrimaryIndexProperty() 2604 if self._match_text_seq("SQL"): 2605 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2606 return None 2607 2608 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2609 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2610 return exp.OnCommitProperty() 2611 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2612 return exp.OnCommitProperty(delete=True) 2613 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2614 2615 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2616 if self._match_text_seq("SQL", "DATA"): 2617 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2618 return None 2619 2620 def _parse_distkey(self) -> exp.DistKeyProperty: 2621 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2622 2623 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2624 table = self._parse_table(schema=True) 2625 2626 options = [] 2627 while self._match_texts(("INCLUDING", "EXCLUDING")): 2628 this = self._prev.text.upper() 2629 2630 id_var = self._parse_id_var() 2631 if not id_var: 2632 return None 2633 2634 options.append( 2635 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2636 ) 2637 2638 return self.expression(exp.LikeProperty, this=table, expressions=options) 2639 2640 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2641 return self.expression( 2642 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2643 ) 2644 2645 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2646 self._match(TokenType.EQ) 2647 return self.expression( 2648 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2649 ) 2650 2651 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2652 self._match_text_seq("WITH", "CONNECTION") 2653 return self.expression( 2654 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2655 ) 2656 2657 def _parse_returns(self) -> exp.ReturnsProperty: 2658 value: t.Optional[exp.Expression] 2659 null = None 2660 is_table = self._match(TokenType.TABLE) 2661 2662 if is_table: 2663 if self._match(TokenType.LT): 2664 value = self.expression( 2665 exp.Schema, 2666 this="TABLE", 2667 expressions=self._parse_csv(self._parse_struct_types), 2668 ) 2669 if not self._match(TokenType.GT): 2670 self.raise_error("Expecting >") 2671 else: 2672 value = self._parse_schema(exp.var("TABLE")) 2673 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2674 null = True 2675 value = None 2676 else: 2677 value = self._parse_types() 2678 2679 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2680 2681 def _parse_describe(self) -> exp.Describe: 2682 kind = self._match_set(self.CREATABLES) and self._prev.text 2683 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2684 if self._match(TokenType.DOT): 2685 style = None 2686 self._retreat(self._index - 2) 2687 2688 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2689 2690 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2691 this = self._parse_statement() 2692 else: 2693 this = self._parse_table(schema=True) 2694 2695 properties = self._parse_properties() 2696 expressions = properties.expressions if properties else None 2697 partition = self._parse_partition() 2698 return self.expression( 2699 exp.Describe, 2700 this=this, 2701 style=style, 2702 kind=kind, 2703 expressions=expressions, 2704 partition=partition, 2705 format=format, 2706 ) 2707 2708 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2709 kind = self._prev.text.upper() 2710 expressions = [] 2711 2712 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2713 if self._match(TokenType.WHEN): 2714 expression = self._parse_disjunction() 2715 self._match(TokenType.THEN) 2716 else: 2717 expression = None 2718 2719 else_ = self._match(TokenType.ELSE) 2720 2721 if not self._match(TokenType.INTO): 2722 return None 2723 2724 return self.expression( 2725 exp.ConditionalInsert, 2726 this=self.expression( 2727 exp.Insert, 2728 this=self._parse_table(schema=True), 2729 expression=self._parse_derived_table_values(), 2730 ), 2731 expression=expression, 2732 else_=else_, 2733 ) 2734 2735 expression = parse_conditional_insert() 2736 while expression is not None: 2737 expressions.append(expression) 2738 expression = parse_conditional_insert() 2739 2740 return self.expression( 2741 exp.MultitableInserts, 2742 kind=kind, 2743 comments=comments, 2744 expressions=expressions, 2745 source=self._parse_table(), 2746 ) 2747 2748 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2749 comments = [] 2750 hint = self._parse_hint() 2751 overwrite = self._match(TokenType.OVERWRITE) 2752 ignore = self._match(TokenType.IGNORE) 2753 local = self._match_text_seq("LOCAL") 2754 alternative = None 2755 is_function = None 2756 2757 if self._match_text_seq("DIRECTORY"): 2758 this: t.Optional[exp.Expression] = self.expression( 2759 exp.Directory, 2760 this=self._parse_var_or_string(), 2761 local=local, 2762 row_format=self._parse_row_format(match_row=True), 2763 ) 2764 else: 2765 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2766 comments += ensure_list(self._prev_comments) 2767 return self._parse_multitable_inserts(comments) 2768 2769 if self._match(TokenType.OR): 2770 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2771 2772 self._match(TokenType.INTO) 2773 comments += ensure_list(self._prev_comments) 2774 self._match(TokenType.TABLE) 2775 is_function = self._match(TokenType.FUNCTION) 2776 2777 this = ( 2778 self._parse_table(schema=True, parse_partition=True) 2779 if not is_function 2780 else self._parse_function() 2781 ) 2782 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2783 this.set("alias", self._parse_table_alias()) 2784 2785 returning = self._parse_returning() 2786 2787 return self.expression( 2788 exp.Insert, 2789 comments=comments, 2790 hint=hint, 2791 is_function=is_function, 2792 this=this, 2793 stored=self._match_text_seq("STORED") and self._parse_stored(), 2794 by_name=self._match_text_seq("BY", "NAME"), 2795 exists=self._parse_exists(), 2796 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2797 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2798 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2799 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2800 conflict=self._parse_on_conflict(), 2801 returning=returning or self._parse_returning(), 2802 overwrite=overwrite, 2803 alternative=alternative, 2804 ignore=ignore, 2805 source=self._match(TokenType.TABLE) and self._parse_table(), 2806 ) 2807 2808 def _parse_kill(self) -> exp.Kill: 2809 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2810 2811 return self.expression( 2812 exp.Kill, 2813 this=self._parse_primary(), 2814 kind=kind, 2815 ) 2816 2817 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2818 conflict = self._match_text_seq("ON", "CONFLICT") 2819 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2820 2821 if not conflict and not duplicate: 2822 return None 2823 2824 conflict_keys = None 2825 constraint = None 2826 2827 if conflict: 2828 if self._match_text_seq("ON", "CONSTRAINT"): 2829 constraint = self._parse_id_var() 2830 elif self._match(TokenType.L_PAREN): 2831 conflict_keys = self._parse_csv(self._parse_id_var) 2832 self._match_r_paren() 2833 2834 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2835 if self._prev.token_type == TokenType.UPDATE: 2836 self._match(TokenType.SET) 2837 expressions = self._parse_csv(self._parse_equality) 2838 else: 2839 expressions = None 2840 2841 return self.expression( 2842 exp.OnConflict, 2843 duplicate=duplicate, 2844 expressions=expressions, 2845 action=action, 2846 conflict_keys=conflict_keys, 2847 constraint=constraint, 2848 where=self._parse_where(), 2849 ) 2850 2851 def _parse_returning(self) -> t.Optional[exp.Returning]: 2852 if not self._match(TokenType.RETURNING): 2853 return None 2854 return self.expression( 2855 exp.Returning, 2856 expressions=self._parse_csv(self._parse_expression), 2857 into=self._match(TokenType.INTO) and self._parse_table_part(), 2858 ) 2859 2860 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2861 if not self._match(TokenType.FORMAT): 2862 return None 2863 return self._parse_row_format() 2864 2865 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2866 index = self._index 2867 with_ = with_ or self._match_text_seq("WITH") 2868 2869 if not self._match(TokenType.SERDE_PROPERTIES): 2870 self._retreat(index) 2871 return None 2872 return self.expression( 2873 exp.SerdeProperties, 2874 **{ # type: ignore 2875 "expressions": self._parse_wrapped_properties(), 2876 "with": with_, 2877 }, 2878 ) 2879 2880 def _parse_row_format( 2881 self, match_row: bool = False 2882 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2883 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2884 return None 2885 2886 if self._match_text_seq("SERDE"): 2887 this = self._parse_string() 2888 2889 serde_properties = self._parse_serde_properties() 2890 2891 return self.expression( 2892 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2893 ) 2894 2895 self._match_text_seq("DELIMITED") 2896 2897 kwargs = {} 2898 2899 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2900 kwargs["fields"] = self._parse_string() 2901 if self._match_text_seq("ESCAPED", "BY"): 2902 kwargs["escaped"] = self._parse_string() 2903 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2904 kwargs["collection_items"] = self._parse_string() 2905 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2906 kwargs["map_keys"] = self._parse_string() 2907 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2908 kwargs["lines"] = self._parse_string() 2909 if self._match_text_seq("NULL", "DEFINED", "AS"): 2910 kwargs["null"] = self._parse_string() 2911 2912 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2913 2914 def _parse_load(self) -> exp.LoadData | exp.Command: 2915 if self._match_text_seq("DATA"): 2916 local = self._match_text_seq("LOCAL") 2917 self._match_text_seq("INPATH") 2918 inpath = self._parse_string() 2919 overwrite = self._match(TokenType.OVERWRITE) 2920 self._match_pair(TokenType.INTO, TokenType.TABLE) 2921 2922 return self.expression( 2923 exp.LoadData, 2924 this=self._parse_table(schema=True), 2925 local=local, 2926 overwrite=overwrite, 2927 inpath=inpath, 2928 partition=self._parse_partition(), 2929 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2930 serde=self._match_text_seq("SERDE") and self._parse_string(), 2931 ) 2932 return self._parse_as_command(self._prev) 2933 2934 def _parse_delete(self) -> exp.Delete: 2935 # This handles MySQL's "Multiple-Table Syntax" 2936 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2937 tables = None 2938 if not self._match(TokenType.FROM, advance=False): 2939 tables = self._parse_csv(self._parse_table) or None 2940 2941 returning = self._parse_returning() 2942 2943 return self.expression( 2944 exp.Delete, 2945 tables=tables, 2946 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2947 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2948 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2949 where=self._parse_where(), 2950 returning=returning or self._parse_returning(), 2951 limit=self._parse_limit(), 2952 ) 2953 2954 def _parse_update(self) -> exp.Update: 2955 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2956 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2957 returning = self._parse_returning() 2958 return self.expression( 2959 exp.Update, 2960 **{ # type: ignore 2961 "this": this, 2962 "expressions": expressions, 2963 "from": self._parse_from(joins=True), 2964 "where": self._parse_where(), 2965 "returning": returning or self._parse_returning(), 2966 "order": self._parse_order(), 2967 "limit": self._parse_limit(), 2968 }, 2969 ) 2970 2971 def _parse_uncache(self) -> exp.Uncache: 2972 if not self._match(TokenType.TABLE): 2973 self.raise_error("Expecting TABLE after UNCACHE") 2974 2975 return self.expression( 2976 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2977 ) 2978 2979 def _parse_cache(self) -> exp.Cache: 2980 lazy = self._match_text_seq("LAZY") 2981 self._match(TokenType.TABLE) 2982 table = self._parse_table(schema=True) 2983 2984 options = [] 2985 if self._match_text_seq("OPTIONS"): 2986 self._match_l_paren() 2987 k = self._parse_string() 2988 self._match(TokenType.EQ) 2989 v = self._parse_string() 2990 options = [k, v] 2991 self._match_r_paren() 2992 2993 self._match(TokenType.ALIAS) 2994 return self.expression( 2995 exp.Cache, 2996 this=table, 2997 lazy=lazy, 2998 options=options, 2999 expression=self._parse_select(nested=True), 3000 ) 3001 3002 def _parse_partition(self) -> t.Optional[exp.Partition]: 3003 if not self._match_texts(self.PARTITION_KEYWORDS): 3004 return None 3005 3006 return self.expression( 3007 exp.Partition, 3008 subpartition=self._prev.text.upper() == "SUBPARTITION", 3009 expressions=self._parse_wrapped_csv(self._parse_assignment), 3010 ) 3011 3012 def _parse_value(self) -> t.Optional[exp.Tuple]: 3013 def _parse_value_expression() -> t.Optional[exp.Expression]: 3014 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3015 return exp.var(self._prev.text.upper()) 3016 return self._parse_expression() 3017 3018 if self._match(TokenType.L_PAREN): 3019 expressions = self._parse_csv(_parse_value_expression) 3020 self._match_r_paren() 3021 return self.expression(exp.Tuple, expressions=expressions) 3022 3023 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3024 expression = self._parse_expression() 3025 if expression: 3026 return self.expression(exp.Tuple, expressions=[expression]) 3027 return None 3028 3029 def _parse_projections(self) -> t.List[exp.Expression]: 3030 return self._parse_expressions() 3031 3032 def _parse_select( 3033 self, 3034 nested: bool = False, 3035 table: bool = False, 3036 parse_subquery_alias: bool = True, 3037 parse_set_operation: bool = True, 3038 ) -> t.Optional[exp.Expression]: 3039 cte = self._parse_with() 3040 3041 if cte: 3042 this = self._parse_statement() 3043 3044 if not this: 3045 self.raise_error("Failed to parse any statement following CTE") 3046 return cte 3047 3048 if "with" in this.arg_types: 3049 this.set("with", cte) 3050 else: 3051 self.raise_error(f"{this.key} does not support CTE") 3052 this = cte 3053 3054 return this 3055 3056 # duckdb supports leading with FROM x 3057 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3058 3059 if self._match(TokenType.SELECT): 3060 comments = self._prev_comments 3061 3062 hint = self._parse_hint() 3063 3064 if self._next and not self._next.token_type == TokenType.DOT: 3065 all_ = self._match(TokenType.ALL) 3066 distinct = self._match_set(self.DISTINCT_TOKENS) 3067 else: 3068 all_, distinct = None, None 3069 3070 kind = ( 3071 self._match(TokenType.ALIAS) 3072 and self._match_texts(("STRUCT", "VALUE")) 3073 and self._prev.text.upper() 3074 ) 3075 3076 if distinct: 3077 distinct = self.expression( 3078 exp.Distinct, 3079 on=self._parse_value() if self._match(TokenType.ON) else None, 3080 ) 3081 3082 if all_ and distinct: 3083 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3084 3085 operation_modifiers = [] 3086 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3087 operation_modifiers.append(exp.var(self._prev.text.upper())) 3088 3089 limit = self._parse_limit(top=True) 3090 projections = self._parse_projections() 3091 3092 this = self.expression( 3093 exp.Select, 3094 kind=kind, 3095 hint=hint, 3096 distinct=distinct, 3097 expressions=projections, 3098 limit=limit, 3099 operation_modifiers=operation_modifiers or None, 3100 ) 3101 this.comments = comments 3102 3103 into = self._parse_into() 3104 if into: 3105 this.set("into", into) 3106 3107 if not from_: 3108 from_ = self._parse_from() 3109 3110 if from_: 3111 this.set("from", from_) 3112 3113 this = self._parse_query_modifiers(this) 3114 elif (table or nested) and self._match(TokenType.L_PAREN): 3115 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3116 this = self._parse_simplified_pivot( 3117 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3118 ) 3119 elif self._match(TokenType.FROM): 3120 from_ = self._parse_from(skip_from_token=True) 3121 # Support parentheses for duckdb FROM-first syntax 3122 select = self._parse_select() 3123 if select: 3124 select.set("from", from_) 3125 this = select 3126 else: 3127 this = exp.select("*").from_(t.cast(exp.From, from_)) 3128 else: 3129 this = ( 3130 self._parse_table() 3131 if table 3132 else self._parse_select(nested=True, parse_set_operation=False) 3133 ) 3134 3135 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3136 # in case a modifier (e.g. join) is following 3137 if table and isinstance(this, exp.Values) and this.alias: 3138 alias = this.args["alias"].pop() 3139 this = exp.Table(this=this, alias=alias) 3140 3141 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3142 3143 self._match_r_paren() 3144 3145 # We return early here so that the UNION isn't attached to the subquery by the 3146 # following call to _parse_set_operations, but instead becomes the parent node 3147 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3148 elif self._match(TokenType.VALUES, advance=False): 3149 this = self._parse_derived_table_values() 3150 elif from_: 3151 this = exp.select("*").from_(from_.this, copy=False) 3152 elif self._match(TokenType.SUMMARIZE): 3153 table = self._match(TokenType.TABLE) 3154 this = self._parse_select() or self._parse_string() or self._parse_table() 3155 return self.expression(exp.Summarize, this=this, table=table) 3156 elif self._match(TokenType.DESCRIBE): 3157 this = self._parse_describe() 3158 elif self._match_text_seq("STREAM"): 3159 this = self._parse_function() 3160 if this: 3161 this = self.expression(exp.Stream, this=this) 3162 else: 3163 self._retreat(self._index - 1) 3164 else: 3165 this = None 3166 3167 return self._parse_set_operations(this) if parse_set_operation else this 3168 3169 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3170 if not skip_with_token and not self._match(TokenType.WITH): 3171 return None 3172 3173 comments = self._prev_comments 3174 recursive = self._match(TokenType.RECURSIVE) 3175 3176 last_comments = None 3177 expressions = [] 3178 while True: 3179 expressions.append(self._parse_cte()) 3180 if last_comments: 3181 expressions[-1].add_comments(last_comments) 3182 3183 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3184 break 3185 else: 3186 self._match(TokenType.WITH) 3187 3188 last_comments = self._prev_comments 3189 3190 return self.expression( 3191 exp.With, comments=comments, expressions=expressions, recursive=recursive 3192 ) 3193 3194 def _parse_cte(self) -> t.Optional[exp.CTE]: 3195 index = self._index 3196 3197 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3198 if not alias or not alias.this: 3199 self.raise_error("Expected CTE to have alias") 3200 3201 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3202 self._retreat(index) 3203 return None 3204 3205 comments = self._prev_comments 3206 3207 if self._match_text_seq("NOT", "MATERIALIZED"): 3208 materialized = False 3209 elif self._match_text_seq("MATERIALIZED"): 3210 materialized = True 3211 else: 3212 materialized = None 3213 3214 cte = self.expression( 3215 exp.CTE, 3216 this=self._parse_wrapped(self._parse_statement), 3217 alias=alias, 3218 materialized=materialized, 3219 comments=comments, 3220 ) 3221 3222 if isinstance(cte.this, exp.Values): 3223 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3224 3225 return cte 3226 3227 def _parse_table_alias( 3228 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3229 ) -> t.Optional[exp.TableAlias]: 3230 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3231 # so this section tries to parse the clause version and if it fails, it treats the token 3232 # as an identifier (alias) 3233 if self._can_parse_limit_or_offset(): 3234 return None 3235 3236 any_token = self._match(TokenType.ALIAS) 3237 alias = ( 3238 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3239 or self._parse_string_as_identifier() 3240 ) 3241 3242 index = self._index 3243 if self._match(TokenType.L_PAREN): 3244 columns = self._parse_csv(self._parse_function_parameter) 3245 self._match_r_paren() if columns else self._retreat(index) 3246 else: 3247 columns = None 3248 3249 if not alias and not columns: 3250 return None 3251 3252 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3253 3254 # We bubble up comments from the Identifier to the TableAlias 3255 if isinstance(alias, exp.Identifier): 3256 table_alias.add_comments(alias.pop_comments()) 3257 3258 return table_alias 3259 3260 def _parse_subquery( 3261 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3262 ) -> t.Optional[exp.Subquery]: 3263 if not this: 3264 return None 3265 3266 return self.expression( 3267 exp.Subquery, 3268 this=this, 3269 pivots=self._parse_pivots(), 3270 alias=self._parse_table_alias() if parse_alias else None, 3271 sample=self._parse_table_sample(), 3272 ) 3273 3274 def _implicit_unnests_to_explicit(self, this: E) -> E: 3275 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3276 3277 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3278 for i, join in enumerate(this.args.get("joins") or []): 3279 table = join.this 3280 normalized_table = table.copy() 3281 normalized_table.meta["maybe_column"] = True 3282 normalized_table = _norm(normalized_table, dialect=self.dialect) 3283 3284 if isinstance(table, exp.Table) and not join.args.get("on"): 3285 if normalized_table.parts[0].name in refs: 3286 table_as_column = table.to_column() 3287 unnest = exp.Unnest(expressions=[table_as_column]) 3288 3289 # Table.to_column creates a parent Alias node that we want to convert to 3290 # a TableAlias and attach to the Unnest, so it matches the parser's output 3291 if isinstance(table.args.get("alias"), exp.TableAlias): 3292 table_as_column.replace(table_as_column.this) 3293 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3294 3295 table.replace(unnest) 3296 3297 refs.add(normalized_table.alias_or_name) 3298 3299 return this 3300 3301 def _parse_query_modifiers( 3302 self, this: t.Optional[exp.Expression] 3303 ) -> t.Optional[exp.Expression]: 3304 if isinstance(this, (exp.Query, exp.Table)): 3305 for join in self._parse_joins(): 3306 this.append("joins", join) 3307 for lateral in iter(self._parse_lateral, None): 3308 this.append("laterals", lateral) 3309 3310 while True: 3311 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3312 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3313 key, expression = parser(self) 3314 3315 if expression: 3316 this.set(key, expression) 3317 if key == "limit": 3318 offset = expression.args.pop("offset", None) 3319 3320 if offset: 3321 offset = exp.Offset(expression=offset) 3322 this.set("offset", offset) 3323 3324 limit_by_expressions = expression.expressions 3325 expression.set("expressions", None) 3326 offset.set("expressions", limit_by_expressions) 3327 continue 3328 break 3329 3330 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3331 this = self._implicit_unnests_to_explicit(this) 3332 3333 return this 3334 3335 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3336 start = self._curr 3337 while self._curr: 3338 self._advance() 3339 3340 end = self._tokens[self._index - 1] 3341 return exp.Hint(expressions=[self._find_sql(start, end)]) 3342 3343 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3344 return self._parse_function_call() 3345 3346 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3347 start_index = self._index 3348 should_fallback_to_string = False 3349 3350 hints = [] 3351 try: 3352 for hint in iter( 3353 lambda: self._parse_csv( 3354 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3355 ), 3356 [], 3357 ): 3358 hints.extend(hint) 3359 except ParseError: 3360 should_fallback_to_string = True 3361 3362 if should_fallback_to_string or self._curr: 3363 self._retreat(start_index) 3364 return self._parse_hint_fallback_to_string() 3365 3366 return self.expression(exp.Hint, expressions=hints) 3367 3368 def _parse_hint(self) -> t.Optional[exp.Hint]: 3369 if self._match(TokenType.HINT) and self._prev_comments: 3370 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3371 3372 return None 3373 3374 def _parse_into(self) -> t.Optional[exp.Into]: 3375 if not self._match(TokenType.INTO): 3376 return None 3377 3378 temp = self._match(TokenType.TEMPORARY) 3379 unlogged = self._match_text_seq("UNLOGGED") 3380 self._match(TokenType.TABLE) 3381 3382 return self.expression( 3383 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3384 ) 3385 3386 def _parse_from( 3387 self, joins: bool = False, skip_from_token: bool = False 3388 ) -> t.Optional[exp.From]: 3389 if not skip_from_token and not self._match(TokenType.FROM): 3390 return None 3391 3392 return self.expression( 3393 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3394 ) 3395 3396 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3397 return self.expression( 3398 exp.MatchRecognizeMeasure, 3399 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3400 this=self._parse_expression(), 3401 ) 3402 3403 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3404 if not self._match(TokenType.MATCH_RECOGNIZE): 3405 return None 3406 3407 self._match_l_paren() 3408 3409 partition = self._parse_partition_by() 3410 order = self._parse_order() 3411 3412 measures = ( 3413 self._parse_csv(self._parse_match_recognize_measure) 3414 if self._match_text_seq("MEASURES") 3415 else None 3416 ) 3417 3418 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3419 rows = exp.var("ONE ROW PER MATCH") 3420 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3421 text = "ALL ROWS PER MATCH" 3422 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3423 text += " SHOW EMPTY MATCHES" 3424 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3425 text += " OMIT EMPTY MATCHES" 3426 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3427 text += " WITH UNMATCHED ROWS" 3428 rows = exp.var(text) 3429 else: 3430 rows = None 3431 3432 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3433 text = "AFTER MATCH SKIP" 3434 if self._match_text_seq("PAST", "LAST", "ROW"): 3435 text += " PAST LAST ROW" 3436 elif self._match_text_seq("TO", "NEXT", "ROW"): 3437 text += " TO NEXT ROW" 3438 elif self._match_text_seq("TO", "FIRST"): 3439 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3440 elif self._match_text_seq("TO", "LAST"): 3441 text += f" TO LAST {self._advance_any().text}" # type: ignore 3442 after = exp.var(text) 3443 else: 3444 after = None 3445 3446 if self._match_text_seq("PATTERN"): 3447 self._match_l_paren() 3448 3449 if not self._curr: 3450 self.raise_error("Expecting )", self._curr) 3451 3452 paren = 1 3453 start = self._curr 3454 3455 while self._curr and paren > 0: 3456 if self._curr.token_type == TokenType.L_PAREN: 3457 paren += 1 3458 if self._curr.token_type == TokenType.R_PAREN: 3459 paren -= 1 3460 3461 end = self._prev 3462 self._advance() 3463 3464 if paren > 0: 3465 self.raise_error("Expecting )", self._curr) 3466 3467 pattern = exp.var(self._find_sql(start, end)) 3468 else: 3469 pattern = None 3470 3471 define = ( 3472 self._parse_csv(self._parse_name_as_expression) 3473 if self._match_text_seq("DEFINE") 3474 else None 3475 ) 3476 3477 self._match_r_paren() 3478 3479 return self.expression( 3480 exp.MatchRecognize, 3481 partition_by=partition, 3482 order=order, 3483 measures=measures, 3484 rows=rows, 3485 after=after, 3486 pattern=pattern, 3487 define=define, 3488 alias=self._parse_table_alias(), 3489 ) 3490 3491 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3492 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3493 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3494 cross_apply = False 3495 3496 if cross_apply is not None: 3497 this = self._parse_select(table=True) 3498 view = None 3499 outer = None 3500 elif self._match(TokenType.LATERAL): 3501 this = self._parse_select(table=True) 3502 view = self._match(TokenType.VIEW) 3503 outer = self._match(TokenType.OUTER) 3504 else: 3505 return None 3506 3507 if not this: 3508 this = ( 3509 self._parse_unnest() 3510 or self._parse_function() 3511 or self._parse_id_var(any_token=False) 3512 ) 3513 3514 while self._match(TokenType.DOT): 3515 this = exp.Dot( 3516 this=this, 3517 expression=self._parse_function() or self._parse_id_var(any_token=False), 3518 ) 3519 3520 if view: 3521 table = self._parse_id_var(any_token=False) 3522 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3523 table_alias: t.Optional[exp.TableAlias] = self.expression( 3524 exp.TableAlias, this=table, columns=columns 3525 ) 3526 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3527 # We move the alias from the lateral's child node to the lateral itself 3528 table_alias = this.args["alias"].pop() 3529 else: 3530 table_alias = self._parse_table_alias() 3531 3532 return self.expression( 3533 exp.Lateral, 3534 this=this, 3535 view=view, 3536 outer=outer, 3537 alias=table_alias, 3538 cross_apply=cross_apply, 3539 ) 3540 3541 def _parse_join_parts( 3542 self, 3543 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3544 return ( 3545 self._match_set(self.JOIN_METHODS) and self._prev, 3546 self._match_set(self.JOIN_SIDES) and self._prev, 3547 self._match_set(self.JOIN_KINDS) and self._prev, 3548 ) 3549 3550 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3551 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3552 this = self._parse_column() 3553 if isinstance(this, exp.Column): 3554 return this.this 3555 return this 3556 3557 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3558 3559 def _parse_join( 3560 self, skip_join_token: bool = False, parse_bracket: bool = False 3561 ) -> t.Optional[exp.Join]: 3562 if self._match(TokenType.COMMA): 3563 return self.expression(exp.Join, this=self._parse_table()) 3564 3565 index = self._index 3566 method, side, kind = self._parse_join_parts() 3567 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3568 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3569 3570 if not skip_join_token and not join: 3571 self._retreat(index) 3572 kind = None 3573 method = None 3574 side = None 3575 3576 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3577 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3578 3579 if not skip_join_token and not join and not outer_apply and not cross_apply: 3580 return None 3581 3582 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3583 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3584 kwargs["expressions"] = self._parse_csv( 3585 lambda: self._parse_table(parse_bracket=parse_bracket) 3586 ) 3587 3588 if method: 3589 kwargs["method"] = method.text 3590 if side: 3591 kwargs["side"] = side.text 3592 if kind: 3593 kwargs["kind"] = kind.text 3594 if hint: 3595 kwargs["hint"] = hint 3596 3597 if self._match(TokenType.MATCH_CONDITION): 3598 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3599 3600 if self._match(TokenType.ON): 3601 kwargs["on"] = self._parse_assignment() 3602 elif self._match(TokenType.USING): 3603 kwargs["using"] = self._parse_using_identifiers() 3604 elif ( 3605 not (outer_apply or cross_apply) 3606 and not isinstance(kwargs["this"], exp.Unnest) 3607 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3608 ): 3609 index = self._index 3610 joins: t.Optional[list] = list(self._parse_joins()) 3611 3612 if joins and self._match(TokenType.ON): 3613 kwargs["on"] = self._parse_assignment() 3614 elif joins and self._match(TokenType.USING): 3615 kwargs["using"] = self._parse_using_identifiers() 3616 else: 3617 joins = None 3618 self._retreat(index) 3619 3620 kwargs["this"].set("joins", joins if joins else None) 3621 3622 comments = [c for token in (method, side, kind) if token for c in token.comments] 3623 return self.expression(exp.Join, comments=comments, **kwargs) 3624 3625 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3626 this = self._parse_assignment() 3627 3628 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3629 return this 3630 3631 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3632 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3633 3634 return this 3635 3636 def _parse_index_params(self) -> exp.IndexParameters: 3637 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3638 3639 if self._match(TokenType.L_PAREN, advance=False): 3640 columns = self._parse_wrapped_csv(self._parse_with_operator) 3641 else: 3642 columns = None 3643 3644 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3645 partition_by = self._parse_partition_by() 3646 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3647 tablespace = ( 3648 self._parse_var(any_token=True) 3649 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3650 else None 3651 ) 3652 where = self._parse_where() 3653 3654 on = self._parse_field() if self._match(TokenType.ON) else None 3655 3656 return self.expression( 3657 exp.IndexParameters, 3658 using=using, 3659 columns=columns, 3660 include=include, 3661 partition_by=partition_by, 3662 where=where, 3663 with_storage=with_storage, 3664 tablespace=tablespace, 3665 on=on, 3666 ) 3667 3668 def _parse_index( 3669 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3670 ) -> t.Optional[exp.Index]: 3671 if index or anonymous: 3672 unique = None 3673 primary = None 3674 amp = None 3675 3676 self._match(TokenType.ON) 3677 self._match(TokenType.TABLE) # hive 3678 table = self._parse_table_parts(schema=True) 3679 else: 3680 unique = self._match(TokenType.UNIQUE) 3681 primary = self._match_text_seq("PRIMARY") 3682 amp = self._match_text_seq("AMP") 3683 3684 if not self._match(TokenType.INDEX): 3685 return None 3686 3687 index = self._parse_id_var() 3688 table = None 3689 3690 params = self._parse_index_params() 3691 3692 return self.expression( 3693 exp.Index, 3694 this=index, 3695 table=table, 3696 unique=unique, 3697 primary=primary, 3698 amp=amp, 3699 params=params, 3700 ) 3701 3702 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3703 hints: t.List[exp.Expression] = [] 3704 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3705 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3706 hints.append( 3707 self.expression( 3708 exp.WithTableHint, 3709 expressions=self._parse_csv( 3710 lambda: self._parse_function() or self._parse_var(any_token=True) 3711 ), 3712 ) 3713 ) 3714 self._match_r_paren() 3715 else: 3716 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3717 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3718 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3719 3720 self._match_set((TokenType.INDEX, TokenType.KEY)) 3721 if self._match(TokenType.FOR): 3722 hint.set("target", self._advance_any() and self._prev.text.upper()) 3723 3724 hint.set("expressions", self._parse_wrapped_id_vars()) 3725 hints.append(hint) 3726 3727 return hints or None 3728 3729 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3730 return ( 3731 (not schema and self._parse_function(optional_parens=False)) 3732 or self._parse_id_var(any_token=False) 3733 or self._parse_string_as_identifier() 3734 or self._parse_placeholder() 3735 ) 3736 3737 def _parse_table_parts( 3738 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3739 ) -> exp.Table: 3740 catalog = None 3741 db = None 3742 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3743 3744 while self._match(TokenType.DOT): 3745 if catalog: 3746 # This allows nesting the table in arbitrarily many dot expressions if needed 3747 table = self.expression( 3748 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3749 ) 3750 else: 3751 catalog = db 3752 db = table 3753 # "" used for tsql FROM a..b case 3754 table = self._parse_table_part(schema=schema) or "" 3755 3756 if ( 3757 wildcard 3758 and self._is_connected() 3759 and (isinstance(table, exp.Identifier) or not table) 3760 and self._match(TokenType.STAR) 3761 ): 3762 if isinstance(table, exp.Identifier): 3763 table.args["this"] += "*" 3764 else: 3765 table = exp.Identifier(this="*") 3766 3767 # We bubble up comments from the Identifier to the Table 3768 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3769 3770 if is_db_reference: 3771 catalog = db 3772 db = table 3773 table = None 3774 3775 if not table and not is_db_reference: 3776 self.raise_error(f"Expected table name but got {self._curr}") 3777 if not db and is_db_reference: 3778 self.raise_error(f"Expected database name but got {self._curr}") 3779 3780 table = self.expression( 3781 exp.Table, 3782 comments=comments, 3783 this=table, 3784 db=db, 3785 catalog=catalog, 3786 ) 3787 3788 changes = self._parse_changes() 3789 if changes: 3790 table.set("changes", changes) 3791 3792 at_before = self._parse_historical_data() 3793 if at_before: 3794 table.set("when", at_before) 3795 3796 pivots = self._parse_pivots() 3797 if pivots: 3798 table.set("pivots", pivots) 3799 3800 return table 3801 3802 def _parse_table( 3803 self, 3804 schema: bool = False, 3805 joins: bool = False, 3806 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3807 parse_bracket: bool = False, 3808 is_db_reference: bool = False, 3809 parse_partition: bool = False, 3810 ) -> t.Optional[exp.Expression]: 3811 lateral = self._parse_lateral() 3812 if lateral: 3813 return lateral 3814 3815 unnest = self._parse_unnest() 3816 if unnest: 3817 return unnest 3818 3819 values = self._parse_derived_table_values() 3820 if values: 3821 return values 3822 3823 subquery = self._parse_select(table=True) 3824 if subquery: 3825 if not subquery.args.get("pivots"): 3826 subquery.set("pivots", self._parse_pivots()) 3827 return subquery 3828 3829 bracket = parse_bracket and self._parse_bracket(None) 3830 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3831 3832 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3833 self._parse_table 3834 ) 3835 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3836 3837 only = self._match(TokenType.ONLY) 3838 3839 this = t.cast( 3840 exp.Expression, 3841 bracket 3842 or rows_from 3843 or self._parse_bracket( 3844 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3845 ), 3846 ) 3847 3848 if only: 3849 this.set("only", only) 3850 3851 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3852 self._match_text_seq("*") 3853 3854 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3855 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3856 this.set("partition", self._parse_partition()) 3857 3858 if schema: 3859 return self._parse_schema(this=this) 3860 3861 version = self._parse_version() 3862 3863 if version: 3864 this.set("version", version) 3865 3866 if self.dialect.ALIAS_POST_TABLESAMPLE: 3867 this.set("sample", self._parse_table_sample()) 3868 3869 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3870 if alias: 3871 this.set("alias", alias) 3872 3873 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3874 return self.expression( 3875 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3876 ) 3877 3878 this.set("hints", self._parse_table_hints()) 3879 3880 if not this.args.get("pivots"): 3881 this.set("pivots", self._parse_pivots()) 3882 3883 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3884 this.set("sample", self._parse_table_sample()) 3885 3886 if joins: 3887 for join in self._parse_joins(): 3888 this.append("joins", join) 3889 3890 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3891 this.set("ordinality", True) 3892 this.set("alias", self._parse_table_alias()) 3893 3894 return this 3895 3896 def _parse_version(self) -> t.Optional[exp.Version]: 3897 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3898 this = "TIMESTAMP" 3899 elif self._match(TokenType.VERSION_SNAPSHOT): 3900 this = "VERSION" 3901 else: 3902 return None 3903 3904 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3905 kind = self._prev.text.upper() 3906 start = self._parse_bitwise() 3907 self._match_texts(("TO", "AND")) 3908 end = self._parse_bitwise() 3909 expression: t.Optional[exp.Expression] = self.expression( 3910 exp.Tuple, expressions=[start, end] 3911 ) 3912 elif self._match_text_seq("CONTAINED", "IN"): 3913 kind = "CONTAINED IN" 3914 expression = self.expression( 3915 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3916 ) 3917 elif self._match(TokenType.ALL): 3918 kind = "ALL" 3919 expression = None 3920 else: 3921 self._match_text_seq("AS", "OF") 3922 kind = "AS OF" 3923 expression = self._parse_type() 3924 3925 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3926 3927 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3928 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3929 index = self._index 3930 historical_data = None 3931 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3932 this = self._prev.text.upper() 3933 kind = ( 3934 self._match(TokenType.L_PAREN) 3935 and self._match_texts(self.HISTORICAL_DATA_KIND) 3936 and self._prev.text.upper() 3937 ) 3938 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3939 3940 if expression: 3941 self._match_r_paren() 3942 historical_data = self.expression( 3943 exp.HistoricalData, this=this, kind=kind, expression=expression 3944 ) 3945 else: 3946 self._retreat(index) 3947 3948 return historical_data 3949 3950 def _parse_changes(self) -> t.Optional[exp.Changes]: 3951 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3952 return None 3953 3954 information = self._parse_var(any_token=True) 3955 self._match_r_paren() 3956 3957 return self.expression( 3958 exp.Changes, 3959 information=information, 3960 at_before=self._parse_historical_data(), 3961 end=self._parse_historical_data(), 3962 ) 3963 3964 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3965 if not self._match(TokenType.UNNEST): 3966 return None 3967 3968 expressions = self._parse_wrapped_csv(self._parse_equality) 3969 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3970 3971 alias = self._parse_table_alias() if with_alias else None 3972 3973 if alias: 3974 if self.dialect.UNNEST_COLUMN_ONLY: 3975 if alias.args.get("columns"): 3976 self.raise_error("Unexpected extra column alias in unnest.") 3977 3978 alias.set("columns", [alias.this]) 3979 alias.set("this", None) 3980 3981 columns = alias.args.get("columns") or [] 3982 if offset and len(expressions) < len(columns): 3983 offset = columns.pop() 3984 3985 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3986 self._match(TokenType.ALIAS) 3987 offset = self._parse_id_var( 3988 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3989 ) or exp.to_identifier("offset") 3990 3991 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3992 3993 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3994 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3995 if not is_derived and not ( 3996 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3997 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3998 ): 3999 return None 4000 4001 expressions = self._parse_csv(self._parse_value) 4002 alias = self._parse_table_alias() 4003 4004 if is_derived: 4005 self._match_r_paren() 4006 4007 return self.expression( 4008 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4009 ) 4010 4011 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4012 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4013 as_modifier and self._match_text_seq("USING", "SAMPLE") 4014 ): 4015 return None 4016 4017 bucket_numerator = None 4018 bucket_denominator = None 4019 bucket_field = None 4020 percent = None 4021 size = None 4022 seed = None 4023 4024 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4025 matched_l_paren = self._match(TokenType.L_PAREN) 4026 4027 if self.TABLESAMPLE_CSV: 4028 num = None 4029 expressions = self._parse_csv(self._parse_primary) 4030 else: 4031 expressions = None 4032 num = ( 4033 self._parse_factor() 4034 if self._match(TokenType.NUMBER, advance=False) 4035 else self._parse_primary() or self._parse_placeholder() 4036 ) 4037 4038 if self._match_text_seq("BUCKET"): 4039 bucket_numerator = self._parse_number() 4040 self._match_text_seq("OUT", "OF") 4041 bucket_denominator = bucket_denominator = self._parse_number() 4042 self._match(TokenType.ON) 4043 bucket_field = self._parse_field() 4044 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4045 percent = num 4046 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4047 size = num 4048 else: 4049 percent = num 4050 4051 if matched_l_paren: 4052 self._match_r_paren() 4053 4054 if self._match(TokenType.L_PAREN): 4055 method = self._parse_var(upper=True) 4056 seed = self._match(TokenType.COMMA) and self._parse_number() 4057 self._match_r_paren() 4058 elif self._match_texts(("SEED", "REPEATABLE")): 4059 seed = self._parse_wrapped(self._parse_number) 4060 4061 if not method and self.DEFAULT_SAMPLING_METHOD: 4062 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4063 4064 return self.expression( 4065 exp.TableSample, 4066 expressions=expressions, 4067 method=method, 4068 bucket_numerator=bucket_numerator, 4069 bucket_denominator=bucket_denominator, 4070 bucket_field=bucket_field, 4071 percent=percent, 4072 size=size, 4073 seed=seed, 4074 ) 4075 4076 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4077 return list(iter(self._parse_pivot, None)) or None 4078 4079 def _parse_joins(self) -> t.Iterator[exp.Join]: 4080 return iter(self._parse_join, None) 4081 4082 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4083 if not self._match(TokenType.INTO): 4084 return None 4085 4086 return self.expression( 4087 exp.UnpivotColumns, 4088 this=self._match_text_seq("NAME") and self._parse_column(), 4089 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4090 ) 4091 4092 # https://duckdb.org/docs/sql/statements/pivot 4093 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4094 def _parse_on() -> t.Optional[exp.Expression]: 4095 this = self._parse_bitwise() 4096 4097 if self._match(TokenType.IN): 4098 # PIVOT ... ON col IN (row_val1, row_val2) 4099 return self._parse_in(this) 4100 if self._match(TokenType.ALIAS, advance=False): 4101 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4102 return self._parse_alias(this) 4103 4104 return this 4105 4106 this = self._parse_table() 4107 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4108 into = self._parse_unpivot_columns() 4109 using = self._match(TokenType.USING) and self._parse_csv( 4110 lambda: self._parse_alias(self._parse_function()) 4111 ) 4112 group = self._parse_group() 4113 4114 return self.expression( 4115 exp.Pivot, 4116 this=this, 4117 expressions=expressions, 4118 using=using, 4119 group=group, 4120 unpivot=is_unpivot, 4121 into=into, 4122 ) 4123 4124 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4125 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4126 this = self._parse_select_or_expression() 4127 4128 self._match(TokenType.ALIAS) 4129 alias = self._parse_bitwise() 4130 if alias: 4131 if isinstance(alias, exp.Column) and not alias.db: 4132 alias = alias.this 4133 return self.expression(exp.PivotAlias, this=this, alias=alias) 4134 4135 return this 4136 4137 value = self._parse_column() 4138 4139 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4140 self.raise_error("Expecting IN (") 4141 4142 if self._match(TokenType.ANY): 4143 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4144 else: 4145 exprs = self._parse_csv(_parse_aliased_expression) 4146 4147 self._match_r_paren() 4148 return self.expression(exp.In, this=value, expressions=exprs) 4149 4150 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4151 index = self._index 4152 include_nulls = None 4153 4154 if self._match(TokenType.PIVOT): 4155 unpivot = False 4156 elif self._match(TokenType.UNPIVOT): 4157 unpivot = True 4158 4159 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4160 if self._match_text_seq("INCLUDE", "NULLS"): 4161 include_nulls = True 4162 elif self._match_text_seq("EXCLUDE", "NULLS"): 4163 include_nulls = False 4164 else: 4165 return None 4166 4167 expressions = [] 4168 4169 if not self._match(TokenType.L_PAREN): 4170 self._retreat(index) 4171 return None 4172 4173 if unpivot: 4174 expressions = self._parse_csv(self._parse_column) 4175 else: 4176 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4177 4178 if not expressions: 4179 self.raise_error("Failed to parse PIVOT's aggregation list") 4180 4181 if not self._match(TokenType.FOR): 4182 self.raise_error("Expecting FOR") 4183 4184 field = self._parse_pivot_in() 4185 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4186 self._parse_bitwise 4187 ) 4188 4189 self._match_r_paren() 4190 4191 pivot = self.expression( 4192 exp.Pivot, 4193 expressions=expressions, 4194 field=field, 4195 unpivot=unpivot, 4196 include_nulls=include_nulls, 4197 default_on_null=default_on_null, 4198 ) 4199 4200 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4201 pivot.set("alias", self._parse_table_alias()) 4202 4203 if not unpivot: 4204 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4205 4206 columns: t.List[exp.Expression] = [] 4207 for fld in pivot.args["field"].expressions: 4208 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4209 for name in names: 4210 if self.PREFIXED_PIVOT_COLUMNS: 4211 name = f"{name}_{field_name}" if name else field_name 4212 else: 4213 name = f"{field_name}_{name}" if name else field_name 4214 4215 columns.append(exp.to_identifier(name)) 4216 4217 pivot.set("columns", columns) 4218 4219 return pivot 4220 4221 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4222 return [agg.alias for agg in aggregations] 4223 4224 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4225 if not skip_where_token and not self._match(TokenType.PREWHERE): 4226 return None 4227 4228 return self.expression( 4229 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4230 ) 4231 4232 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4233 if not skip_where_token and not self._match(TokenType.WHERE): 4234 return None 4235 4236 return self.expression( 4237 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4238 ) 4239 4240 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4241 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4242 return None 4243 4244 elements: t.Dict[str, t.Any] = defaultdict(list) 4245 4246 if self._match(TokenType.ALL): 4247 elements["all"] = True 4248 elif self._match(TokenType.DISTINCT): 4249 elements["all"] = False 4250 4251 while True: 4252 index = self._index 4253 4254 elements["expressions"].extend( 4255 self._parse_csv( 4256 lambda: None 4257 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4258 else self._parse_assignment() 4259 ) 4260 ) 4261 4262 before_with_index = self._index 4263 with_prefix = self._match(TokenType.WITH) 4264 4265 if self._match(TokenType.ROLLUP): 4266 elements["rollup"].append( 4267 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4268 ) 4269 elif self._match(TokenType.CUBE): 4270 elements["cube"].append( 4271 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4272 ) 4273 elif self._match(TokenType.GROUPING_SETS): 4274 elements["grouping_sets"].append( 4275 self.expression( 4276 exp.GroupingSets, 4277 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4278 ) 4279 ) 4280 elif self._match_text_seq("TOTALS"): 4281 elements["totals"] = True # type: ignore 4282 4283 if before_with_index <= self._index <= before_with_index + 1: 4284 self._retreat(before_with_index) 4285 break 4286 4287 if index == self._index: 4288 break 4289 4290 return self.expression(exp.Group, **elements) # type: ignore 4291 4292 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4293 return self.expression( 4294 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4295 ) 4296 4297 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4298 if self._match(TokenType.L_PAREN): 4299 grouping_set = self._parse_csv(self._parse_column) 4300 self._match_r_paren() 4301 return self.expression(exp.Tuple, expressions=grouping_set) 4302 4303 return self._parse_column() 4304 4305 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4306 if not skip_having_token and not self._match(TokenType.HAVING): 4307 return None 4308 return self.expression(exp.Having, this=self._parse_assignment()) 4309 4310 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4311 if not self._match(TokenType.QUALIFY): 4312 return None 4313 return self.expression(exp.Qualify, this=self._parse_assignment()) 4314 4315 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4316 if skip_start_token: 4317 start = None 4318 elif self._match(TokenType.START_WITH): 4319 start = self._parse_assignment() 4320 else: 4321 return None 4322 4323 self._match(TokenType.CONNECT_BY) 4324 nocycle = self._match_text_seq("NOCYCLE") 4325 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4326 exp.Prior, this=self._parse_bitwise() 4327 ) 4328 connect = self._parse_assignment() 4329 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4330 4331 if not start and self._match(TokenType.START_WITH): 4332 start = self._parse_assignment() 4333 4334 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4335 4336 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4337 this = self._parse_id_var(any_token=True) 4338 if self._match(TokenType.ALIAS): 4339 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4340 return this 4341 4342 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4343 if self._match_text_seq("INTERPOLATE"): 4344 return self._parse_wrapped_csv(self._parse_name_as_expression) 4345 return None 4346 4347 def _parse_order( 4348 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4349 ) -> t.Optional[exp.Expression]: 4350 siblings = None 4351 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4352 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4353 return this 4354 4355 siblings = True 4356 4357 return self.expression( 4358 exp.Order, 4359 this=this, 4360 expressions=self._parse_csv(self._parse_ordered), 4361 siblings=siblings, 4362 ) 4363 4364 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4365 if not self._match(token): 4366 return None 4367 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4368 4369 def _parse_ordered( 4370 self, parse_method: t.Optional[t.Callable] = None 4371 ) -> t.Optional[exp.Ordered]: 4372 this = parse_method() if parse_method else self._parse_assignment() 4373 if not this: 4374 return None 4375 4376 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4377 this = exp.var("ALL") 4378 4379 asc = self._match(TokenType.ASC) 4380 desc = self._match(TokenType.DESC) or (asc and False) 4381 4382 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4383 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4384 4385 nulls_first = is_nulls_first or False 4386 explicitly_null_ordered = is_nulls_first or is_nulls_last 4387 4388 if ( 4389 not explicitly_null_ordered 4390 and ( 4391 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4392 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4393 ) 4394 and self.dialect.NULL_ORDERING != "nulls_are_last" 4395 ): 4396 nulls_first = True 4397 4398 if self._match_text_seq("WITH", "FILL"): 4399 with_fill = self.expression( 4400 exp.WithFill, 4401 **{ # type: ignore 4402 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4403 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4404 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4405 "interpolate": self._parse_interpolate(), 4406 }, 4407 ) 4408 else: 4409 with_fill = None 4410 4411 return self.expression( 4412 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4413 ) 4414 4415 def _parse_limit( 4416 self, 4417 this: t.Optional[exp.Expression] = None, 4418 top: bool = False, 4419 skip_limit_token: bool = False, 4420 ) -> t.Optional[exp.Expression]: 4421 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4422 comments = self._prev_comments 4423 if top: 4424 limit_paren = self._match(TokenType.L_PAREN) 4425 expression = self._parse_term() if limit_paren else self._parse_number() 4426 4427 if limit_paren: 4428 self._match_r_paren() 4429 else: 4430 expression = self._parse_term() 4431 4432 if self._match(TokenType.COMMA): 4433 offset = expression 4434 expression = self._parse_term() 4435 else: 4436 offset = None 4437 4438 limit_exp = self.expression( 4439 exp.Limit, 4440 this=this, 4441 expression=expression, 4442 offset=offset, 4443 comments=comments, 4444 expressions=self._parse_limit_by(), 4445 ) 4446 4447 return limit_exp 4448 4449 if self._match(TokenType.FETCH): 4450 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4451 direction = self._prev.text.upper() if direction else "FIRST" 4452 4453 count = self._parse_field(tokens=self.FETCH_TOKENS) 4454 percent = self._match(TokenType.PERCENT) 4455 4456 self._match_set((TokenType.ROW, TokenType.ROWS)) 4457 4458 only = self._match_text_seq("ONLY") 4459 with_ties = self._match_text_seq("WITH", "TIES") 4460 4461 if only and with_ties: 4462 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4463 4464 return self.expression( 4465 exp.Fetch, 4466 direction=direction, 4467 count=count, 4468 percent=percent, 4469 with_ties=with_ties, 4470 ) 4471 4472 return this 4473 4474 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4475 if not self._match(TokenType.OFFSET): 4476 return this 4477 4478 count = self._parse_term() 4479 self._match_set((TokenType.ROW, TokenType.ROWS)) 4480 4481 return self.expression( 4482 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4483 ) 4484 4485 def _can_parse_limit_or_offset(self) -> bool: 4486 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4487 return False 4488 4489 index = self._index 4490 result = bool( 4491 self._try_parse(self._parse_limit, retreat=True) 4492 or self._try_parse(self._parse_offset, retreat=True) 4493 ) 4494 self._retreat(index) 4495 return result 4496 4497 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4498 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4499 4500 def _parse_locks(self) -> t.List[exp.Lock]: 4501 locks = [] 4502 while True: 4503 if self._match_text_seq("FOR", "UPDATE"): 4504 update = True 4505 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4506 "LOCK", "IN", "SHARE", "MODE" 4507 ): 4508 update = False 4509 else: 4510 break 4511 4512 expressions = None 4513 if self._match_text_seq("OF"): 4514 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4515 4516 wait: t.Optional[bool | exp.Expression] = None 4517 if self._match_text_seq("NOWAIT"): 4518 wait = True 4519 elif self._match_text_seq("WAIT"): 4520 wait = self._parse_primary() 4521 elif self._match_text_seq("SKIP", "LOCKED"): 4522 wait = False 4523 4524 locks.append( 4525 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4526 ) 4527 4528 return locks 4529 4530 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4531 while this and self._match_set(self.SET_OPERATIONS): 4532 token_type = self._prev.token_type 4533 4534 if token_type == TokenType.UNION: 4535 operation: t.Type[exp.SetOperation] = exp.Union 4536 elif token_type == TokenType.EXCEPT: 4537 operation = exp.Except 4538 else: 4539 operation = exp.Intersect 4540 4541 comments = self._prev.comments 4542 4543 if self._match(TokenType.DISTINCT): 4544 distinct: t.Optional[bool] = True 4545 elif self._match(TokenType.ALL): 4546 distinct = False 4547 else: 4548 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4549 if distinct is None: 4550 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4551 4552 by_name = self._match_text_seq("BY", "NAME") 4553 expression = self._parse_select(nested=True, parse_set_operation=False) 4554 4555 this = self.expression( 4556 operation, 4557 comments=comments, 4558 this=this, 4559 distinct=distinct, 4560 by_name=by_name, 4561 expression=expression, 4562 ) 4563 4564 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4565 expression = this.expression 4566 4567 if expression: 4568 for arg in self.SET_OP_MODIFIERS: 4569 expr = expression.args.get(arg) 4570 if expr: 4571 this.set(arg, expr.pop()) 4572 4573 return this 4574 4575 def _parse_expression(self) -> t.Optional[exp.Expression]: 4576 return self._parse_alias(self._parse_assignment()) 4577 4578 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4579 this = self._parse_disjunction() 4580 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4581 # This allows us to parse <non-identifier token> := <expr> 4582 this = exp.column( 4583 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4584 ) 4585 4586 while self._match_set(self.ASSIGNMENT): 4587 if isinstance(this, exp.Column) and len(this.parts) == 1: 4588 this = this.this 4589 4590 this = self.expression( 4591 self.ASSIGNMENT[self._prev.token_type], 4592 this=this, 4593 comments=self._prev_comments, 4594 expression=self._parse_assignment(), 4595 ) 4596 4597 return this 4598 4599 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4600 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4601 4602 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4603 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4604 4605 def _parse_equality(self) -> t.Optional[exp.Expression]: 4606 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4607 4608 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4609 return self._parse_tokens(self._parse_range, self.COMPARISON) 4610 4611 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4612 this = this or self._parse_bitwise() 4613 negate = self._match(TokenType.NOT) 4614 4615 if self._match_set(self.RANGE_PARSERS): 4616 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4617 if not expression: 4618 return this 4619 4620 this = expression 4621 elif self._match(TokenType.ISNULL): 4622 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4623 4624 # Postgres supports ISNULL and NOTNULL for conditions. 4625 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4626 if self._match(TokenType.NOTNULL): 4627 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4628 this = self.expression(exp.Not, this=this) 4629 4630 if negate: 4631 this = self._negate_range(this) 4632 4633 if self._match(TokenType.IS): 4634 this = self._parse_is(this) 4635 4636 return this 4637 4638 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4639 if not this: 4640 return this 4641 4642 return self.expression(exp.Not, this=this) 4643 4644 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4645 index = self._index - 1 4646 negate = self._match(TokenType.NOT) 4647 4648 if self._match_text_seq("DISTINCT", "FROM"): 4649 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4650 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4651 4652 if self._match(TokenType.JSON): 4653 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4654 4655 if self._match_text_seq("WITH"): 4656 _with = True 4657 elif self._match_text_seq("WITHOUT"): 4658 _with = False 4659 else: 4660 _with = None 4661 4662 unique = self._match(TokenType.UNIQUE) 4663 self._match_text_seq("KEYS") 4664 expression: t.Optional[exp.Expression] = self.expression( 4665 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4666 ) 4667 else: 4668 expression = self._parse_primary() or self._parse_null() 4669 if not expression: 4670 self._retreat(index) 4671 return None 4672 4673 this = self.expression(exp.Is, this=this, expression=expression) 4674 return self.expression(exp.Not, this=this) if negate else this 4675 4676 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4677 unnest = self._parse_unnest(with_alias=False) 4678 if unnest: 4679 this = self.expression(exp.In, this=this, unnest=unnest) 4680 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4681 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4682 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4683 4684 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4685 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4686 else: 4687 this = self.expression(exp.In, this=this, expressions=expressions) 4688 4689 if matched_l_paren: 4690 self._match_r_paren(this) 4691 elif not self._match(TokenType.R_BRACKET, expression=this): 4692 self.raise_error("Expecting ]") 4693 else: 4694 this = self.expression(exp.In, this=this, field=self._parse_column()) 4695 4696 return this 4697 4698 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4699 low = self._parse_bitwise() 4700 self._match(TokenType.AND) 4701 high = self._parse_bitwise() 4702 return self.expression(exp.Between, this=this, low=low, high=high) 4703 4704 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4705 if not self._match(TokenType.ESCAPE): 4706 return this 4707 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4708 4709 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4710 index = self._index 4711 4712 if not self._match(TokenType.INTERVAL) and match_interval: 4713 return None 4714 4715 if self._match(TokenType.STRING, advance=False): 4716 this = self._parse_primary() 4717 else: 4718 this = self._parse_term() 4719 4720 if not this or ( 4721 isinstance(this, exp.Column) 4722 and not this.table 4723 and not this.this.quoted 4724 and this.name.upper() == "IS" 4725 ): 4726 self._retreat(index) 4727 return None 4728 4729 unit = self._parse_function() or ( 4730 not self._match(TokenType.ALIAS, advance=False) 4731 and self._parse_var(any_token=True, upper=True) 4732 ) 4733 4734 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4735 # each INTERVAL expression into this canonical form so it's easy to transpile 4736 if this and this.is_number: 4737 this = exp.Literal.string(this.to_py()) 4738 elif this and this.is_string: 4739 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4740 if parts and unit: 4741 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4742 unit = None 4743 self._retreat(self._index - 1) 4744 4745 if len(parts) == 1: 4746 this = exp.Literal.string(parts[0][0]) 4747 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4748 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4749 unit = self.expression( 4750 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4751 ) 4752 4753 interval = self.expression(exp.Interval, this=this, unit=unit) 4754 4755 index = self._index 4756 self._match(TokenType.PLUS) 4757 4758 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4759 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4760 return self.expression( 4761 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4762 ) 4763 4764 self._retreat(index) 4765 return interval 4766 4767 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4768 this = self._parse_term() 4769 4770 while True: 4771 if self._match_set(self.BITWISE): 4772 this = self.expression( 4773 self.BITWISE[self._prev.token_type], 4774 this=this, 4775 expression=self._parse_term(), 4776 ) 4777 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4778 this = self.expression( 4779 exp.DPipe, 4780 this=this, 4781 expression=self._parse_term(), 4782 safe=not self.dialect.STRICT_STRING_CONCAT, 4783 ) 4784 elif self._match(TokenType.DQMARK): 4785 this = self.expression( 4786 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4787 ) 4788 elif self._match_pair(TokenType.LT, TokenType.LT): 4789 this = self.expression( 4790 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4791 ) 4792 elif self._match_pair(TokenType.GT, TokenType.GT): 4793 this = self.expression( 4794 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4795 ) 4796 else: 4797 break 4798 4799 return this 4800 4801 def _parse_term(self) -> t.Optional[exp.Expression]: 4802 this = self._parse_factor() 4803 4804 while self._match_set(self.TERM): 4805 klass = self.TERM[self._prev.token_type] 4806 comments = self._prev_comments 4807 expression = self._parse_factor() 4808 4809 this = self.expression(klass, this=this, comments=comments, expression=expression) 4810 4811 if isinstance(this, exp.Collate): 4812 expr = this.expression 4813 4814 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4815 # fallback to Identifier / Var 4816 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4817 ident = expr.this 4818 if isinstance(ident, exp.Identifier): 4819 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4820 4821 return this 4822 4823 def _parse_factor(self) -> t.Optional[exp.Expression]: 4824 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4825 this = parse_method() 4826 4827 while self._match_set(self.FACTOR): 4828 klass = self.FACTOR[self._prev.token_type] 4829 comments = self._prev_comments 4830 expression = parse_method() 4831 4832 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4833 self._retreat(self._index - 1) 4834 return this 4835 4836 this = self.expression(klass, this=this, comments=comments, expression=expression) 4837 4838 if isinstance(this, exp.Div): 4839 this.args["typed"] = self.dialect.TYPED_DIVISION 4840 this.args["safe"] = self.dialect.SAFE_DIVISION 4841 4842 return this 4843 4844 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4845 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4846 4847 def _parse_unary(self) -> t.Optional[exp.Expression]: 4848 if self._match_set(self.UNARY_PARSERS): 4849 return self.UNARY_PARSERS[self._prev.token_type](self) 4850 return self._parse_at_time_zone(self._parse_type()) 4851 4852 def _parse_type( 4853 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4854 ) -> t.Optional[exp.Expression]: 4855 interval = parse_interval and self._parse_interval() 4856 if interval: 4857 return interval 4858 4859 index = self._index 4860 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4861 4862 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4863 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4864 if isinstance(data_type, exp.Cast): 4865 # This constructor can contain ops directly after it, for instance struct unnesting: 4866 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4867 return self._parse_column_ops(data_type) 4868 4869 if data_type: 4870 index2 = self._index 4871 this = self._parse_primary() 4872 4873 if isinstance(this, exp.Literal): 4874 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4875 if parser: 4876 return parser(self, this, data_type) 4877 4878 return self.expression(exp.Cast, this=this, to=data_type) 4879 4880 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4881 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4882 # 4883 # If the index difference here is greater than 1, that means the parser itself must have 4884 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4885 # 4886 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4887 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4888 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4889 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4890 # 4891 # In these cases, we don't really want to return the converted type, but instead retreat 4892 # and try to parse a Column or Identifier in the section below. 4893 if data_type.expressions and index2 - index > 1: 4894 self._retreat(index2) 4895 return self._parse_column_ops(data_type) 4896 4897 self._retreat(index) 4898 4899 if fallback_to_identifier: 4900 return self._parse_id_var() 4901 4902 this = self._parse_column() 4903 return this and self._parse_column_ops(this) 4904 4905 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4906 this = self._parse_type() 4907 if not this: 4908 return None 4909 4910 if isinstance(this, exp.Column) and not this.table: 4911 this = exp.var(this.name.upper()) 4912 4913 return self.expression( 4914 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4915 ) 4916 4917 def _parse_types( 4918 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4919 ) -> t.Optional[exp.Expression]: 4920 index = self._index 4921 4922 this: t.Optional[exp.Expression] = None 4923 prefix = self._match_text_seq("SYSUDTLIB", ".") 4924 4925 if not self._match_set(self.TYPE_TOKENS): 4926 identifier = allow_identifiers and self._parse_id_var( 4927 any_token=False, tokens=(TokenType.VAR,) 4928 ) 4929 if isinstance(identifier, exp.Identifier): 4930 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4931 4932 if len(tokens) != 1: 4933 self.raise_error("Unexpected identifier", self._prev) 4934 4935 if tokens[0].token_type in self.TYPE_TOKENS: 4936 self._prev = tokens[0] 4937 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4938 type_name = identifier.name 4939 4940 while self._match(TokenType.DOT): 4941 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4942 4943 this = exp.DataType.build(type_name, udt=True) 4944 else: 4945 self._retreat(self._index - 1) 4946 return None 4947 else: 4948 return None 4949 4950 type_token = self._prev.token_type 4951 4952 if type_token == TokenType.PSEUDO_TYPE: 4953 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4954 4955 if type_token == TokenType.OBJECT_IDENTIFIER: 4956 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4957 4958 # https://materialize.com/docs/sql/types/map/ 4959 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4960 key_type = self._parse_types( 4961 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4962 ) 4963 if not self._match(TokenType.FARROW): 4964 self._retreat(index) 4965 return None 4966 4967 value_type = self._parse_types( 4968 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4969 ) 4970 if not self._match(TokenType.R_BRACKET): 4971 self._retreat(index) 4972 return None 4973 4974 return exp.DataType( 4975 this=exp.DataType.Type.MAP, 4976 expressions=[key_type, value_type], 4977 nested=True, 4978 prefix=prefix, 4979 ) 4980 4981 nested = type_token in self.NESTED_TYPE_TOKENS 4982 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4983 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4984 expressions = None 4985 maybe_func = False 4986 4987 if self._match(TokenType.L_PAREN): 4988 if is_struct: 4989 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4990 elif nested: 4991 expressions = self._parse_csv( 4992 lambda: self._parse_types( 4993 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4994 ) 4995 ) 4996 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4997 this = expressions[0] 4998 this.set("nullable", True) 4999 self._match_r_paren() 5000 return this 5001 elif type_token in self.ENUM_TYPE_TOKENS: 5002 expressions = self._parse_csv(self._parse_equality) 5003 elif is_aggregate: 5004 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5005 any_token=False, tokens=(TokenType.VAR,) 5006 ) 5007 if not func_or_ident or not self._match(TokenType.COMMA): 5008 return None 5009 expressions = self._parse_csv( 5010 lambda: self._parse_types( 5011 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5012 ) 5013 ) 5014 expressions.insert(0, func_or_ident) 5015 else: 5016 expressions = self._parse_csv(self._parse_type_size) 5017 5018 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5019 if type_token == TokenType.VECTOR and len(expressions) == 2: 5020 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5021 5022 if not expressions or not self._match(TokenType.R_PAREN): 5023 self._retreat(index) 5024 return None 5025 5026 maybe_func = True 5027 5028 values: t.Optional[t.List[exp.Expression]] = None 5029 5030 if nested and self._match(TokenType.LT): 5031 if is_struct: 5032 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5033 else: 5034 expressions = self._parse_csv( 5035 lambda: self._parse_types( 5036 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5037 ) 5038 ) 5039 5040 if not self._match(TokenType.GT): 5041 self.raise_error("Expecting >") 5042 5043 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5044 values = self._parse_csv(self._parse_assignment) 5045 if not values and is_struct: 5046 values = None 5047 self._retreat(self._index - 1) 5048 else: 5049 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5050 5051 if type_token in self.TIMESTAMPS: 5052 if self._match_text_seq("WITH", "TIME", "ZONE"): 5053 maybe_func = False 5054 tz_type = ( 5055 exp.DataType.Type.TIMETZ 5056 if type_token in self.TIMES 5057 else exp.DataType.Type.TIMESTAMPTZ 5058 ) 5059 this = exp.DataType(this=tz_type, expressions=expressions) 5060 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5061 maybe_func = False 5062 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5063 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5064 maybe_func = False 5065 elif type_token == TokenType.INTERVAL: 5066 unit = self._parse_var(upper=True) 5067 if unit: 5068 if self._match_text_seq("TO"): 5069 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5070 5071 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5072 else: 5073 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5074 5075 if maybe_func and check_func: 5076 index2 = self._index 5077 peek = self._parse_string() 5078 5079 if not peek: 5080 self._retreat(index) 5081 return None 5082 5083 self._retreat(index2) 5084 5085 if not this: 5086 if self._match_text_seq("UNSIGNED"): 5087 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5088 if not unsigned_type_token: 5089 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5090 5091 type_token = unsigned_type_token or type_token 5092 5093 this = exp.DataType( 5094 this=exp.DataType.Type[type_token.value], 5095 expressions=expressions, 5096 nested=nested, 5097 prefix=prefix, 5098 ) 5099 5100 # Empty arrays/structs are allowed 5101 if values is not None: 5102 cls = exp.Struct if is_struct else exp.Array 5103 this = exp.cast(cls(expressions=values), this, copy=False) 5104 5105 elif expressions: 5106 this.set("expressions", expressions) 5107 5108 # https://materialize.com/docs/sql/types/list/#type-name 5109 while self._match(TokenType.LIST): 5110 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5111 5112 index = self._index 5113 5114 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5115 matched_array = self._match(TokenType.ARRAY) 5116 5117 while self._curr: 5118 datatype_token = self._prev.token_type 5119 matched_l_bracket = self._match(TokenType.L_BRACKET) 5120 5121 if (not matched_l_bracket and not matched_array) or ( 5122 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5123 ): 5124 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5125 # not to be confused with the fixed size array parsing 5126 break 5127 5128 matched_array = False 5129 values = self._parse_csv(self._parse_assignment) or None 5130 if ( 5131 values 5132 and not schema 5133 and ( 5134 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5135 ) 5136 ): 5137 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5138 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5139 self._retreat(index) 5140 break 5141 5142 this = exp.DataType( 5143 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5144 ) 5145 self._match(TokenType.R_BRACKET) 5146 5147 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5148 converter = self.TYPE_CONVERTERS.get(this.this) 5149 if converter: 5150 this = converter(t.cast(exp.DataType, this)) 5151 5152 return this 5153 5154 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5155 index = self._index 5156 5157 if ( 5158 self._curr 5159 and self._next 5160 and self._curr.token_type in self.TYPE_TOKENS 5161 and self._next.token_type in self.TYPE_TOKENS 5162 ): 5163 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5164 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5165 this = self._parse_id_var() 5166 else: 5167 this = ( 5168 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5169 or self._parse_id_var() 5170 ) 5171 5172 self._match(TokenType.COLON) 5173 5174 if ( 5175 type_required 5176 and not isinstance(this, exp.DataType) 5177 and not self._match_set(self.TYPE_TOKENS, advance=False) 5178 ): 5179 self._retreat(index) 5180 return self._parse_types() 5181 5182 return self._parse_column_def(this) 5183 5184 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5185 if not self._match_text_seq("AT", "TIME", "ZONE"): 5186 return this 5187 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5188 5189 def _parse_column(self) -> t.Optional[exp.Expression]: 5190 this = self._parse_column_reference() 5191 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5192 5193 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5194 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5195 5196 return column 5197 5198 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5199 this = self._parse_field() 5200 if ( 5201 not this 5202 and self._match(TokenType.VALUES, advance=False) 5203 and self.VALUES_FOLLOWED_BY_PAREN 5204 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5205 ): 5206 this = self._parse_id_var() 5207 5208 if isinstance(this, exp.Identifier): 5209 # We bubble up comments from the Identifier to the Column 5210 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5211 5212 return this 5213 5214 def _parse_colon_as_variant_extract( 5215 self, this: t.Optional[exp.Expression] 5216 ) -> t.Optional[exp.Expression]: 5217 casts = [] 5218 json_path = [] 5219 escape = None 5220 5221 while self._match(TokenType.COLON): 5222 start_index = self._index 5223 5224 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5225 path = self._parse_column_ops( 5226 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5227 ) 5228 5229 # The cast :: operator has a lower precedence than the extraction operator :, so 5230 # we rearrange the AST appropriately to avoid casting the JSON path 5231 while isinstance(path, exp.Cast): 5232 casts.append(path.to) 5233 path = path.this 5234 5235 if casts: 5236 dcolon_offset = next( 5237 i 5238 for i, t in enumerate(self._tokens[start_index:]) 5239 if t.token_type == TokenType.DCOLON 5240 ) 5241 end_token = self._tokens[start_index + dcolon_offset - 1] 5242 else: 5243 end_token = self._prev 5244 5245 if path: 5246 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5247 # it'll roundtrip to a string literal in GET_PATH 5248 if isinstance(path, exp.Identifier) and path.quoted: 5249 escape = True 5250 5251 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5252 5253 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5254 # Databricks transforms it back to the colon/dot notation 5255 if json_path: 5256 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5257 5258 if json_path_expr: 5259 json_path_expr.set("escape", escape) 5260 5261 this = self.expression( 5262 exp.JSONExtract, 5263 this=this, 5264 expression=json_path_expr, 5265 variant_extract=True, 5266 ) 5267 5268 while casts: 5269 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5270 5271 return this 5272 5273 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5274 return self._parse_types() 5275 5276 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5277 this = self._parse_bracket(this) 5278 5279 while self._match_set(self.COLUMN_OPERATORS): 5280 op_token = self._prev.token_type 5281 op = self.COLUMN_OPERATORS.get(op_token) 5282 5283 if op_token == TokenType.DCOLON: 5284 field = self._parse_dcolon() 5285 if not field: 5286 self.raise_error("Expected type") 5287 elif op and self._curr: 5288 field = self._parse_column_reference() or self._parse_bracket() 5289 else: 5290 field = self._parse_field(any_token=True, anonymous_func=True) 5291 5292 if isinstance(field, (exp.Func, exp.Window)) and this: 5293 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5294 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5295 this = exp.replace_tree( 5296 this, 5297 lambda n: ( 5298 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5299 if n.table 5300 else n.this 5301 ) 5302 if isinstance(n, exp.Column) 5303 else n, 5304 ) 5305 5306 if op: 5307 this = op(self, this, field) 5308 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5309 this = self.expression( 5310 exp.Column, 5311 comments=this.comments, 5312 this=field, 5313 table=this.this, 5314 db=this.args.get("table"), 5315 catalog=this.args.get("db"), 5316 ) 5317 elif isinstance(field, exp.Window): 5318 # Move the exp.Dot's to the window's function 5319 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5320 field.set("this", window_func) 5321 this = field 5322 else: 5323 this = self.expression(exp.Dot, this=this, expression=field) 5324 5325 if field and field.comments: 5326 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5327 5328 this = self._parse_bracket(this) 5329 5330 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5331 5332 def _parse_primary(self) -> t.Optional[exp.Expression]: 5333 if self._match_set(self.PRIMARY_PARSERS): 5334 token_type = self._prev.token_type 5335 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5336 5337 if token_type == TokenType.STRING: 5338 expressions = [primary] 5339 while self._match(TokenType.STRING): 5340 expressions.append(exp.Literal.string(self._prev.text)) 5341 5342 if len(expressions) > 1: 5343 return self.expression(exp.Concat, expressions=expressions) 5344 5345 return primary 5346 5347 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5348 return exp.Literal.number(f"0.{self._prev.text}") 5349 5350 if self._match(TokenType.L_PAREN): 5351 comments = self._prev_comments 5352 query = self._parse_select() 5353 5354 if query: 5355 expressions = [query] 5356 else: 5357 expressions = self._parse_expressions() 5358 5359 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5360 5361 if not this and self._match(TokenType.R_PAREN, advance=False): 5362 this = self.expression(exp.Tuple) 5363 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5364 this = self._parse_subquery(this=this, parse_alias=False) 5365 elif isinstance(this, exp.Subquery): 5366 this = self._parse_subquery( 5367 this=self._parse_set_operations(this), parse_alias=False 5368 ) 5369 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5370 this = self.expression(exp.Tuple, expressions=expressions) 5371 else: 5372 this = self.expression(exp.Paren, this=this) 5373 5374 if this: 5375 this.add_comments(comments) 5376 5377 self._match_r_paren(expression=this) 5378 return this 5379 5380 return None 5381 5382 def _parse_field( 5383 self, 5384 any_token: bool = False, 5385 tokens: t.Optional[t.Collection[TokenType]] = None, 5386 anonymous_func: bool = False, 5387 ) -> t.Optional[exp.Expression]: 5388 if anonymous_func: 5389 field = ( 5390 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5391 or self._parse_primary() 5392 ) 5393 else: 5394 field = self._parse_primary() or self._parse_function( 5395 anonymous=anonymous_func, any_token=any_token 5396 ) 5397 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5398 5399 def _parse_function( 5400 self, 5401 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5402 anonymous: bool = False, 5403 optional_parens: bool = True, 5404 any_token: bool = False, 5405 ) -> t.Optional[exp.Expression]: 5406 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5407 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5408 fn_syntax = False 5409 if ( 5410 self._match(TokenType.L_BRACE, advance=False) 5411 and self._next 5412 and self._next.text.upper() == "FN" 5413 ): 5414 self._advance(2) 5415 fn_syntax = True 5416 5417 func = self._parse_function_call( 5418 functions=functions, 5419 anonymous=anonymous, 5420 optional_parens=optional_parens, 5421 any_token=any_token, 5422 ) 5423 5424 if fn_syntax: 5425 self._match(TokenType.R_BRACE) 5426 5427 return func 5428 5429 def _parse_function_call( 5430 self, 5431 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5432 anonymous: bool = False, 5433 optional_parens: bool = True, 5434 any_token: bool = False, 5435 ) -> t.Optional[exp.Expression]: 5436 if not self._curr: 5437 return None 5438 5439 comments = self._curr.comments 5440 token_type = self._curr.token_type 5441 this = self._curr.text 5442 upper = this.upper() 5443 5444 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5445 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5446 self._advance() 5447 return self._parse_window(parser(self)) 5448 5449 if not self._next or self._next.token_type != TokenType.L_PAREN: 5450 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5451 self._advance() 5452 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5453 5454 return None 5455 5456 if any_token: 5457 if token_type in self.RESERVED_TOKENS: 5458 return None 5459 elif token_type not in self.FUNC_TOKENS: 5460 return None 5461 5462 self._advance(2) 5463 5464 parser = self.FUNCTION_PARSERS.get(upper) 5465 if parser and not anonymous: 5466 this = parser(self) 5467 else: 5468 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5469 5470 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5471 this = self.expression( 5472 subquery_predicate, comments=comments, this=self._parse_select() 5473 ) 5474 self._match_r_paren() 5475 return this 5476 5477 if functions is None: 5478 functions = self.FUNCTIONS 5479 5480 function = functions.get(upper) 5481 known_function = function and not anonymous 5482 5483 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5484 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5485 5486 post_func_comments = self._curr and self._curr.comments 5487 if known_function and post_func_comments: 5488 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5489 # call we'll construct it as exp.Anonymous, even if it's "known" 5490 if any( 5491 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5492 for comment in post_func_comments 5493 ): 5494 known_function = False 5495 5496 if alias and known_function: 5497 args = self._kv_to_prop_eq(args) 5498 5499 if known_function: 5500 func_builder = t.cast(t.Callable, function) 5501 5502 if "dialect" in func_builder.__code__.co_varnames: 5503 func = func_builder(args, dialect=self.dialect) 5504 else: 5505 func = func_builder(args) 5506 5507 func = self.validate_expression(func, args) 5508 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5509 func.meta["name"] = this 5510 5511 this = func 5512 else: 5513 if token_type == TokenType.IDENTIFIER: 5514 this = exp.Identifier(this=this, quoted=True) 5515 this = self.expression(exp.Anonymous, this=this, expressions=args) 5516 5517 if isinstance(this, exp.Expression): 5518 this.add_comments(comments) 5519 5520 self._match_r_paren(this) 5521 return self._parse_window(this) 5522 5523 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5524 return expression 5525 5526 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5527 transformed = [] 5528 5529 for index, e in enumerate(expressions): 5530 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5531 if isinstance(e, exp.Alias): 5532 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5533 5534 if not isinstance(e, exp.PropertyEQ): 5535 e = self.expression( 5536 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5537 ) 5538 5539 if isinstance(e.this, exp.Column): 5540 e.this.replace(e.this.this) 5541 else: 5542 e = self._to_prop_eq(e, index) 5543 5544 transformed.append(e) 5545 5546 return transformed 5547 5548 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5549 return self._parse_statement() 5550 5551 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5552 return self._parse_column_def(self._parse_id_var()) 5553 5554 def _parse_user_defined_function( 5555 self, kind: t.Optional[TokenType] = None 5556 ) -> t.Optional[exp.Expression]: 5557 this = self._parse_id_var() 5558 5559 while self._match(TokenType.DOT): 5560 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5561 5562 if not self._match(TokenType.L_PAREN): 5563 return this 5564 5565 expressions = self._parse_csv(self._parse_function_parameter) 5566 self._match_r_paren() 5567 return self.expression( 5568 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5569 ) 5570 5571 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5572 literal = self._parse_primary() 5573 if literal: 5574 return self.expression(exp.Introducer, this=token.text, expression=literal) 5575 5576 return self.expression(exp.Identifier, this=token.text) 5577 5578 def _parse_session_parameter(self) -> exp.SessionParameter: 5579 kind = None 5580 this = self._parse_id_var() or self._parse_primary() 5581 5582 if this and self._match(TokenType.DOT): 5583 kind = this.name 5584 this = self._parse_var() or self._parse_primary() 5585 5586 return self.expression(exp.SessionParameter, this=this, kind=kind) 5587 5588 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5589 return self._parse_id_var() 5590 5591 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5592 index = self._index 5593 5594 if self._match(TokenType.L_PAREN): 5595 expressions = t.cast( 5596 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5597 ) 5598 5599 if not self._match(TokenType.R_PAREN): 5600 self._retreat(index) 5601 else: 5602 expressions = [self._parse_lambda_arg()] 5603 5604 if self._match_set(self.LAMBDAS): 5605 return self.LAMBDAS[self._prev.token_type](self, expressions) 5606 5607 self._retreat(index) 5608 5609 this: t.Optional[exp.Expression] 5610 5611 if self._match(TokenType.DISTINCT): 5612 this = self.expression( 5613 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5614 ) 5615 else: 5616 this = self._parse_select_or_expression(alias=alias) 5617 5618 return self._parse_limit( 5619 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5620 ) 5621 5622 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5623 index = self._index 5624 if not self._match(TokenType.L_PAREN): 5625 return this 5626 5627 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5628 # expr can be of both types 5629 if self._match_set(self.SELECT_START_TOKENS): 5630 self._retreat(index) 5631 return this 5632 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5633 self._match_r_paren() 5634 return self.expression(exp.Schema, this=this, expressions=args) 5635 5636 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5637 return self._parse_column_def(self._parse_field(any_token=True)) 5638 5639 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5640 # column defs are not really columns, they're identifiers 5641 if isinstance(this, exp.Column): 5642 this = this.this 5643 5644 kind = self._parse_types(schema=True) 5645 5646 if self._match_text_seq("FOR", "ORDINALITY"): 5647 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5648 5649 constraints: t.List[exp.Expression] = [] 5650 5651 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5652 ("ALIAS", "MATERIALIZED") 5653 ): 5654 persisted = self._prev.text.upper() == "MATERIALIZED" 5655 constraint_kind = exp.ComputedColumnConstraint( 5656 this=self._parse_assignment(), 5657 persisted=persisted or self._match_text_seq("PERSISTED"), 5658 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5659 ) 5660 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5661 elif ( 5662 kind 5663 and self._match(TokenType.ALIAS, advance=False) 5664 and ( 5665 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5666 or (self._next and self._next.token_type == TokenType.L_PAREN) 5667 ) 5668 ): 5669 self._advance() 5670 constraints.append( 5671 self.expression( 5672 exp.ColumnConstraint, 5673 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5674 ) 5675 ) 5676 5677 while True: 5678 constraint = self._parse_column_constraint() 5679 if not constraint: 5680 break 5681 constraints.append(constraint) 5682 5683 if not kind and not constraints: 5684 return this 5685 5686 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5687 5688 def _parse_auto_increment( 5689 self, 5690 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5691 start = None 5692 increment = None 5693 5694 if self._match(TokenType.L_PAREN, advance=False): 5695 args = self._parse_wrapped_csv(self._parse_bitwise) 5696 start = seq_get(args, 0) 5697 increment = seq_get(args, 1) 5698 elif self._match_text_seq("START"): 5699 start = self._parse_bitwise() 5700 self._match_text_seq("INCREMENT") 5701 increment = self._parse_bitwise() 5702 5703 if start and increment: 5704 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5705 5706 return exp.AutoIncrementColumnConstraint() 5707 5708 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5709 if not self._match_text_seq("REFRESH"): 5710 self._retreat(self._index - 1) 5711 return None 5712 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5713 5714 def _parse_compress(self) -> exp.CompressColumnConstraint: 5715 if self._match(TokenType.L_PAREN, advance=False): 5716 return self.expression( 5717 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5718 ) 5719 5720 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5721 5722 def _parse_generated_as_identity( 5723 self, 5724 ) -> ( 5725 exp.GeneratedAsIdentityColumnConstraint 5726 | exp.ComputedColumnConstraint 5727 | exp.GeneratedAsRowColumnConstraint 5728 ): 5729 if self._match_text_seq("BY", "DEFAULT"): 5730 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5731 this = self.expression( 5732 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5733 ) 5734 else: 5735 self._match_text_seq("ALWAYS") 5736 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5737 5738 self._match(TokenType.ALIAS) 5739 5740 if self._match_text_seq("ROW"): 5741 start = self._match_text_seq("START") 5742 if not start: 5743 self._match(TokenType.END) 5744 hidden = self._match_text_seq("HIDDEN") 5745 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5746 5747 identity = self._match_text_seq("IDENTITY") 5748 5749 if self._match(TokenType.L_PAREN): 5750 if self._match(TokenType.START_WITH): 5751 this.set("start", self._parse_bitwise()) 5752 if self._match_text_seq("INCREMENT", "BY"): 5753 this.set("increment", self._parse_bitwise()) 5754 if self._match_text_seq("MINVALUE"): 5755 this.set("minvalue", self._parse_bitwise()) 5756 if self._match_text_seq("MAXVALUE"): 5757 this.set("maxvalue", self._parse_bitwise()) 5758 5759 if self._match_text_seq("CYCLE"): 5760 this.set("cycle", True) 5761 elif self._match_text_seq("NO", "CYCLE"): 5762 this.set("cycle", False) 5763 5764 if not identity: 5765 this.set("expression", self._parse_range()) 5766 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5767 args = self._parse_csv(self._parse_bitwise) 5768 this.set("start", seq_get(args, 0)) 5769 this.set("increment", seq_get(args, 1)) 5770 5771 self._match_r_paren() 5772 5773 return this 5774 5775 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5776 self._match_text_seq("LENGTH") 5777 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5778 5779 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5780 if self._match_text_seq("NULL"): 5781 return self.expression(exp.NotNullColumnConstraint) 5782 if self._match_text_seq("CASESPECIFIC"): 5783 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5784 if self._match_text_seq("FOR", "REPLICATION"): 5785 return self.expression(exp.NotForReplicationColumnConstraint) 5786 5787 # Unconsume the `NOT` token 5788 self._retreat(self._index - 1) 5789 return None 5790 5791 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5792 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5793 5794 procedure_option_follows = ( 5795 self._match(TokenType.WITH, advance=False) 5796 and self._next 5797 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5798 ) 5799 5800 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5801 return self.expression( 5802 exp.ColumnConstraint, 5803 this=this, 5804 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5805 ) 5806 5807 return this 5808 5809 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5810 if not self._match(TokenType.CONSTRAINT): 5811 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5812 5813 return self.expression( 5814 exp.Constraint, 5815 this=self._parse_id_var(), 5816 expressions=self._parse_unnamed_constraints(), 5817 ) 5818 5819 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5820 constraints = [] 5821 while True: 5822 constraint = self._parse_unnamed_constraint() or self._parse_function() 5823 if not constraint: 5824 break 5825 constraints.append(constraint) 5826 5827 return constraints 5828 5829 def _parse_unnamed_constraint( 5830 self, constraints: t.Optional[t.Collection[str]] = None 5831 ) -> t.Optional[exp.Expression]: 5832 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5833 constraints or self.CONSTRAINT_PARSERS 5834 ): 5835 return None 5836 5837 constraint = self._prev.text.upper() 5838 if constraint not in self.CONSTRAINT_PARSERS: 5839 self.raise_error(f"No parser found for schema constraint {constraint}.") 5840 5841 return self.CONSTRAINT_PARSERS[constraint](self) 5842 5843 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5844 return self._parse_id_var(any_token=False) 5845 5846 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5847 self._match_text_seq("KEY") 5848 return self.expression( 5849 exp.UniqueColumnConstraint, 5850 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5851 this=self._parse_schema(self._parse_unique_key()), 5852 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5853 on_conflict=self._parse_on_conflict(), 5854 ) 5855 5856 def _parse_key_constraint_options(self) -> t.List[str]: 5857 options = [] 5858 while True: 5859 if not self._curr: 5860 break 5861 5862 if self._match(TokenType.ON): 5863 action = None 5864 on = self._advance_any() and self._prev.text 5865 5866 if self._match_text_seq("NO", "ACTION"): 5867 action = "NO ACTION" 5868 elif self._match_text_seq("CASCADE"): 5869 action = "CASCADE" 5870 elif self._match_text_seq("RESTRICT"): 5871 action = "RESTRICT" 5872 elif self._match_pair(TokenType.SET, TokenType.NULL): 5873 action = "SET NULL" 5874 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5875 action = "SET DEFAULT" 5876 else: 5877 self.raise_error("Invalid key constraint") 5878 5879 options.append(f"ON {on} {action}") 5880 else: 5881 var = self._parse_var_from_options( 5882 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5883 ) 5884 if not var: 5885 break 5886 options.append(var.name) 5887 5888 return options 5889 5890 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5891 if match and not self._match(TokenType.REFERENCES): 5892 return None 5893 5894 expressions = None 5895 this = self._parse_table(schema=True) 5896 options = self._parse_key_constraint_options() 5897 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5898 5899 def _parse_foreign_key(self) -> exp.ForeignKey: 5900 expressions = self._parse_wrapped_id_vars() 5901 reference = self._parse_references() 5902 options = {} 5903 5904 while self._match(TokenType.ON): 5905 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5906 self.raise_error("Expected DELETE or UPDATE") 5907 5908 kind = self._prev.text.lower() 5909 5910 if self._match_text_seq("NO", "ACTION"): 5911 action = "NO ACTION" 5912 elif self._match(TokenType.SET): 5913 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5914 action = "SET " + self._prev.text.upper() 5915 else: 5916 self._advance() 5917 action = self._prev.text.upper() 5918 5919 options[kind] = action 5920 5921 return self.expression( 5922 exp.ForeignKey, 5923 expressions=expressions, 5924 reference=reference, 5925 **options, # type: ignore 5926 ) 5927 5928 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5929 return self._parse_ordered() or self._parse_field() 5930 5931 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5932 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5933 self._retreat(self._index - 1) 5934 return None 5935 5936 id_vars = self._parse_wrapped_id_vars() 5937 return self.expression( 5938 exp.PeriodForSystemTimeConstraint, 5939 this=seq_get(id_vars, 0), 5940 expression=seq_get(id_vars, 1), 5941 ) 5942 5943 def _parse_primary_key( 5944 self, wrapped_optional: bool = False, in_props: bool = False 5945 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5946 desc = ( 5947 self._match_set((TokenType.ASC, TokenType.DESC)) 5948 and self._prev.token_type == TokenType.DESC 5949 ) 5950 5951 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5952 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5953 5954 expressions = self._parse_wrapped_csv( 5955 self._parse_primary_key_part, optional=wrapped_optional 5956 ) 5957 options = self._parse_key_constraint_options() 5958 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5959 5960 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5961 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5962 5963 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5964 """ 5965 Parses a datetime column in ODBC format. We parse the column into the corresponding 5966 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5967 same as we did for `DATE('yyyy-mm-dd')`. 5968 5969 Reference: 5970 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5971 """ 5972 self._match(TokenType.VAR) 5973 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5974 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5975 if not self._match(TokenType.R_BRACE): 5976 self.raise_error("Expected }") 5977 return expression 5978 5979 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5980 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5981 return this 5982 5983 bracket_kind = self._prev.token_type 5984 if ( 5985 bracket_kind == TokenType.L_BRACE 5986 and self._curr 5987 and self._curr.token_type == TokenType.VAR 5988 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5989 ): 5990 return self._parse_odbc_datetime_literal() 5991 5992 expressions = self._parse_csv( 5993 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5994 ) 5995 5996 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5997 self.raise_error("Expected ]") 5998 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5999 self.raise_error("Expected }") 6000 6001 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6002 if bracket_kind == TokenType.L_BRACE: 6003 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6004 elif not this: 6005 this = build_array_constructor( 6006 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6007 ) 6008 else: 6009 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6010 if constructor_type: 6011 return build_array_constructor( 6012 constructor_type, 6013 args=expressions, 6014 bracket_kind=bracket_kind, 6015 dialect=self.dialect, 6016 ) 6017 6018 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6019 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6020 6021 self._add_comments(this) 6022 return self._parse_bracket(this) 6023 6024 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6025 if self._match(TokenType.COLON): 6026 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6027 return this 6028 6029 def _parse_case(self) -> t.Optional[exp.Expression]: 6030 ifs = [] 6031 default = None 6032 6033 comments = self._prev_comments 6034 expression = self._parse_assignment() 6035 6036 while self._match(TokenType.WHEN): 6037 this = self._parse_assignment() 6038 self._match(TokenType.THEN) 6039 then = self._parse_assignment() 6040 ifs.append(self.expression(exp.If, this=this, true=then)) 6041 6042 if self._match(TokenType.ELSE): 6043 default = self._parse_assignment() 6044 6045 if not self._match(TokenType.END): 6046 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6047 default = exp.column("interval") 6048 else: 6049 self.raise_error("Expected END after CASE", self._prev) 6050 6051 return self.expression( 6052 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6053 ) 6054 6055 def _parse_if(self) -> t.Optional[exp.Expression]: 6056 if self._match(TokenType.L_PAREN): 6057 args = self._parse_csv(self._parse_assignment) 6058 this = self.validate_expression(exp.If.from_arg_list(args), args) 6059 self._match_r_paren() 6060 else: 6061 index = self._index - 1 6062 6063 if self.NO_PAREN_IF_COMMANDS and index == 0: 6064 return self._parse_as_command(self._prev) 6065 6066 condition = self._parse_assignment() 6067 6068 if not condition: 6069 self._retreat(index) 6070 return None 6071 6072 self._match(TokenType.THEN) 6073 true = self._parse_assignment() 6074 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6075 self._match(TokenType.END) 6076 this = self.expression(exp.If, this=condition, true=true, false=false) 6077 6078 return this 6079 6080 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6081 if not self._match_text_seq("VALUE", "FOR"): 6082 self._retreat(self._index - 1) 6083 return None 6084 6085 return self.expression( 6086 exp.NextValueFor, 6087 this=self._parse_column(), 6088 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6089 ) 6090 6091 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6092 this = self._parse_function() or self._parse_var_or_string(upper=True) 6093 6094 if self._match(TokenType.FROM): 6095 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6096 6097 if not self._match(TokenType.COMMA): 6098 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6099 6100 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6101 6102 def _parse_gap_fill(self) -> exp.GapFill: 6103 self._match(TokenType.TABLE) 6104 this = self._parse_table() 6105 6106 self._match(TokenType.COMMA) 6107 args = [this, *self._parse_csv(self._parse_lambda)] 6108 6109 gap_fill = exp.GapFill.from_arg_list(args) 6110 return self.validate_expression(gap_fill, args) 6111 6112 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6113 this = self._parse_assignment() 6114 6115 if not self._match(TokenType.ALIAS): 6116 if self._match(TokenType.COMMA): 6117 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6118 6119 self.raise_error("Expected AS after CAST") 6120 6121 fmt = None 6122 to = self._parse_types() 6123 6124 default = self._match(TokenType.DEFAULT) 6125 if default: 6126 default = self._parse_bitwise() 6127 self._match_text_seq("ON", "CONVERSION", "ERROR") 6128 6129 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6130 fmt_string = self._parse_string() 6131 fmt = self._parse_at_time_zone(fmt_string) 6132 6133 if not to: 6134 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6135 if to.this in exp.DataType.TEMPORAL_TYPES: 6136 this = self.expression( 6137 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6138 this=this, 6139 format=exp.Literal.string( 6140 format_time( 6141 fmt_string.this if fmt_string else "", 6142 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6143 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6144 ) 6145 ), 6146 safe=safe, 6147 ) 6148 6149 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6150 this.set("zone", fmt.args["zone"]) 6151 return this 6152 elif not to: 6153 self.raise_error("Expected TYPE after CAST") 6154 elif isinstance(to, exp.Identifier): 6155 to = exp.DataType.build(to.name, udt=True) 6156 elif to.this == exp.DataType.Type.CHAR: 6157 if self._match(TokenType.CHARACTER_SET): 6158 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6159 6160 return self.expression( 6161 exp.Cast if strict else exp.TryCast, 6162 this=this, 6163 to=to, 6164 format=fmt, 6165 safe=safe, 6166 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6167 default=default, 6168 ) 6169 6170 def _parse_string_agg(self) -> exp.GroupConcat: 6171 if self._match(TokenType.DISTINCT): 6172 args: t.List[t.Optional[exp.Expression]] = [ 6173 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6174 ] 6175 if self._match(TokenType.COMMA): 6176 args.extend(self._parse_csv(self._parse_assignment)) 6177 else: 6178 args = self._parse_csv(self._parse_assignment) # type: ignore 6179 6180 if self._match_text_seq("ON", "OVERFLOW"): 6181 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6182 if self._match_text_seq("ERROR"): 6183 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6184 else: 6185 self._match_text_seq("TRUNCATE") 6186 on_overflow = self.expression( 6187 exp.OverflowTruncateBehavior, 6188 this=self._parse_string(), 6189 with_count=( 6190 self._match_text_seq("WITH", "COUNT") 6191 or not self._match_text_seq("WITHOUT", "COUNT") 6192 ), 6193 ) 6194 else: 6195 on_overflow = None 6196 6197 index = self._index 6198 if not self._match(TokenType.R_PAREN) and args: 6199 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6200 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6201 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6202 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6203 6204 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6205 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6206 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6207 if not self._match_text_seq("WITHIN", "GROUP"): 6208 self._retreat(index) 6209 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6210 6211 # The corresponding match_r_paren will be called in parse_function (caller) 6212 self._match_l_paren() 6213 6214 return self.expression( 6215 exp.GroupConcat, 6216 this=self._parse_order(this=seq_get(args, 0)), 6217 separator=seq_get(args, 1), 6218 on_overflow=on_overflow, 6219 ) 6220 6221 def _parse_convert( 6222 self, strict: bool, safe: t.Optional[bool] = None 6223 ) -> t.Optional[exp.Expression]: 6224 this = self._parse_bitwise() 6225 6226 if self._match(TokenType.USING): 6227 to: t.Optional[exp.Expression] = self.expression( 6228 exp.CharacterSet, this=self._parse_var() 6229 ) 6230 elif self._match(TokenType.COMMA): 6231 to = self._parse_types() 6232 else: 6233 to = None 6234 6235 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6236 6237 def _parse_xml_table(self) -> exp.XMLTable: 6238 namespaces = None 6239 passing = None 6240 columns = None 6241 6242 if self._match_text_seq("XMLNAMESPACES", "("): 6243 namespaces = self._parse_xml_namespace() 6244 self._match_text_seq(")", ",") 6245 6246 this = self._parse_string() 6247 6248 if self._match_text_seq("PASSING"): 6249 # The BY VALUE keywords are optional and are provided for semantic clarity 6250 self._match_text_seq("BY", "VALUE") 6251 passing = self._parse_csv(self._parse_column) 6252 6253 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6254 6255 if self._match_text_seq("COLUMNS"): 6256 columns = self._parse_csv(self._parse_field_def) 6257 6258 return self.expression( 6259 exp.XMLTable, 6260 this=this, 6261 namespaces=namespaces, 6262 passing=passing, 6263 columns=columns, 6264 by_ref=by_ref, 6265 ) 6266 6267 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6268 namespaces = [] 6269 6270 while True: 6271 if self._match(TokenType.DEFAULT): 6272 uri = self._parse_string() 6273 else: 6274 uri = self._parse_alias(self._parse_string()) 6275 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6276 if not self._match(TokenType.COMMA): 6277 break 6278 6279 return namespaces 6280 6281 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6282 """ 6283 There are generally two variants of the DECODE function: 6284 6285 - DECODE(bin, charset) 6286 - DECODE(expression, search, result [, search, result] ... [, default]) 6287 6288 The second variant will always be parsed into a CASE expression. Note that NULL 6289 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6290 instead of relying on pattern matching. 6291 """ 6292 args = self._parse_csv(self._parse_assignment) 6293 6294 if len(args) < 3: 6295 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6296 6297 expression, *expressions = args 6298 if not expression: 6299 return None 6300 6301 ifs = [] 6302 for search, result in zip(expressions[::2], expressions[1::2]): 6303 if not search or not result: 6304 return None 6305 6306 if isinstance(search, exp.Literal): 6307 ifs.append( 6308 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6309 ) 6310 elif isinstance(search, exp.Null): 6311 ifs.append( 6312 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6313 ) 6314 else: 6315 cond = exp.or_( 6316 exp.EQ(this=expression.copy(), expression=search), 6317 exp.and_( 6318 exp.Is(this=expression.copy(), expression=exp.Null()), 6319 exp.Is(this=search.copy(), expression=exp.Null()), 6320 copy=False, 6321 ), 6322 copy=False, 6323 ) 6324 ifs.append(exp.If(this=cond, true=result)) 6325 6326 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6327 6328 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6329 self._match_text_seq("KEY") 6330 key = self._parse_column() 6331 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6332 self._match_text_seq("VALUE") 6333 value = self._parse_bitwise() 6334 6335 if not key and not value: 6336 return None 6337 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6338 6339 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6340 if not this or not self._match_text_seq("FORMAT", "JSON"): 6341 return this 6342 6343 return self.expression(exp.FormatJson, this=this) 6344 6345 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6346 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6347 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6348 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6349 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6350 else: 6351 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6352 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6353 6354 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6355 6356 if not empty and not error and not null: 6357 return None 6358 6359 return self.expression( 6360 exp.OnCondition, 6361 empty=empty, 6362 error=error, 6363 null=null, 6364 ) 6365 6366 def _parse_on_handling( 6367 self, on: str, *values: str 6368 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6369 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6370 for value in values: 6371 if self._match_text_seq(value, "ON", on): 6372 return f"{value} ON {on}" 6373 6374 index = self._index 6375 if self._match(TokenType.DEFAULT): 6376 default_value = self._parse_bitwise() 6377 if self._match_text_seq("ON", on): 6378 return default_value 6379 6380 self._retreat(index) 6381 6382 return None 6383 6384 @t.overload 6385 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6386 6387 @t.overload 6388 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6389 6390 def _parse_json_object(self, agg=False): 6391 star = self._parse_star() 6392 expressions = ( 6393 [star] 6394 if star 6395 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6396 ) 6397 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6398 6399 unique_keys = None 6400 if self._match_text_seq("WITH", "UNIQUE"): 6401 unique_keys = True 6402 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6403 unique_keys = False 6404 6405 self._match_text_seq("KEYS") 6406 6407 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6408 self._parse_type() 6409 ) 6410 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6411 6412 return self.expression( 6413 exp.JSONObjectAgg if agg else exp.JSONObject, 6414 expressions=expressions, 6415 null_handling=null_handling, 6416 unique_keys=unique_keys, 6417 return_type=return_type, 6418 encoding=encoding, 6419 ) 6420 6421 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6422 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6423 if not self._match_text_seq("NESTED"): 6424 this = self._parse_id_var() 6425 kind = self._parse_types(allow_identifiers=False) 6426 nested = None 6427 else: 6428 this = None 6429 kind = None 6430 nested = True 6431 6432 path = self._match_text_seq("PATH") and self._parse_string() 6433 nested_schema = nested and self._parse_json_schema() 6434 6435 return self.expression( 6436 exp.JSONColumnDef, 6437 this=this, 6438 kind=kind, 6439 path=path, 6440 nested_schema=nested_schema, 6441 ) 6442 6443 def _parse_json_schema(self) -> exp.JSONSchema: 6444 self._match_text_seq("COLUMNS") 6445 return self.expression( 6446 exp.JSONSchema, 6447 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6448 ) 6449 6450 def _parse_json_table(self) -> exp.JSONTable: 6451 this = self._parse_format_json(self._parse_bitwise()) 6452 path = self._match(TokenType.COMMA) and self._parse_string() 6453 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6454 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6455 schema = self._parse_json_schema() 6456 6457 return exp.JSONTable( 6458 this=this, 6459 schema=schema, 6460 path=path, 6461 error_handling=error_handling, 6462 empty_handling=empty_handling, 6463 ) 6464 6465 def _parse_match_against(self) -> exp.MatchAgainst: 6466 expressions = self._parse_csv(self._parse_column) 6467 6468 self._match_text_seq(")", "AGAINST", "(") 6469 6470 this = self._parse_string() 6471 6472 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6473 modifier = "IN NATURAL LANGUAGE MODE" 6474 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6475 modifier = f"{modifier} WITH QUERY EXPANSION" 6476 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6477 modifier = "IN BOOLEAN MODE" 6478 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6479 modifier = "WITH QUERY EXPANSION" 6480 else: 6481 modifier = None 6482 6483 return self.expression( 6484 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6485 ) 6486 6487 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6488 def _parse_open_json(self) -> exp.OpenJSON: 6489 this = self._parse_bitwise() 6490 path = self._match(TokenType.COMMA) and self._parse_string() 6491 6492 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6493 this = self._parse_field(any_token=True) 6494 kind = self._parse_types() 6495 path = self._parse_string() 6496 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6497 6498 return self.expression( 6499 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6500 ) 6501 6502 expressions = None 6503 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6504 self._match_l_paren() 6505 expressions = self._parse_csv(_parse_open_json_column_def) 6506 6507 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6508 6509 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6510 args = self._parse_csv(self._parse_bitwise) 6511 6512 if self._match(TokenType.IN): 6513 return self.expression( 6514 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6515 ) 6516 6517 if haystack_first: 6518 haystack = seq_get(args, 0) 6519 needle = seq_get(args, 1) 6520 else: 6521 haystack = seq_get(args, 1) 6522 needle = seq_get(args, 0) 6523 6524 return self.expression( 6525 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6526 ) 6527 6528 def _parse_predict(self) -> exp.Predict: 6529 self._match_text_seq("MODEL") 6530 this = self._parse_table() 6531 6532 self._match(TokenType.COMMA) 6533 self._match_text_seq("TABLE") 6534 6535 return self.expression( 6536 exp.Predict, 6537 this=this, 6538 expression=self._parse_table(), 6539 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6540 ) 6541 6542 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6543 args = self._parse_csv(self._parse_table) 6544 return exp.JoinHint(this=func_name.upper(), expressions=args) 6545 6546 def _parse_substring(self) -> exp.Substring: 6547 # Postgres supports the form: substring(string [from int] [for int]) 6548 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6549 6550 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6551 6552 if self._match(TokenType.FROM): 6553 args.append(self._parse_bitwise()) 6554 if self._match(TokenType.FOR): 6555 if len(args) == 1: 6556 args.append(exp.Literal.number(1)) 6557 args.append(self._parse_bitwise()) 6558 6559 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6560 6561 def _parse_trim(self) -> exp.Trim: 6562 # https://www.w3resource.com/sql/character-functions/trim.php 6563 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6564 6565 position = None 6566 collation = None 6567 expression = None 6568 6569 if self._match_texts(self.TRIM_TYPES): 6570 position = self._prev.text.upper() 6571 6572 this = self._parse_bitwise() 6573 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6574 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6575 expression = self._parse_bitwise() 6576 6577 if invert_order: 6578 this, expression = expression, this 6579 6580 if self._match(TokenType.COLLATE): 6581 collation = self._parse_bitwise() 6582 6583 return self.expression( 6584 exp.Trim, this=this, position=position, expression=expression, collation=collation 6585 ) 6586 6587 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6588 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6589 6590 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6591 return self._parse_window(self._parse_id_var(), alias=True) 6592 6593 def _parse_respect_or_ignore_nulls( 6594 self, this: t.Optional[exp.Expression] 6595 ) -> t.Optional[exp.Expression]: 6596 if self._match_text_seq("IGNORE", "NULLS"): 6597 return self.expression(exp.IgnoreNulls, this=this) 6598 if self._match_text_seq("RESPECT", "NULLS"): 6599 return self.expression(exp.RespectNulls, this=this) 6600 return this 6601 6602 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6603 if self._match(TokenType.HAVING): 6604 self._match_texts(("MAX", "MIN")) 6605 max = self._prev.text.upper() != "MIN" 6606 return self.expression( 6607 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6608 ) 6609 6610 return this 6611 6612 def _parse_window( 6613 self, this: t.Optional[exp.Expression], alias: bool = False 6614 ) -> t.Optional[exp.Expression]: 6615 func = this 6616 comments = func.comments if isinstance(func, exp.Expression) else None 6617 6618 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6619 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6620 if self._match_text_seq("WITHIN", "GROUP"): 6621 order = self._parse_wrapped(self._parse_order) 6622 this = self.expression(exp.WithinGroup, this=this, expression=order) 6623 6624 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6625 self._match(TokenType.WHERE) 6626 this = self.expression( 6627 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6628 ) 6629 self._match_r_paren() 6630 6631 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6632 # Some dialects choose to implement and some do not. 6633 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6634 6635 # There is some code above in _parse_lambda that handles 6636 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6637 6638 # The below changes handle 6639 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6640 6641 # Oracle allows both formats 6642 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6643 # and Snowflake chose to do the same for familiarity 6644 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6645 if isinstance(this, exp.AggFunc): 6646 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6647 6648 if ignore_respect and ignore_respect is not this: 6649 ignore_respect.replace(ignore_respect.this) 6650 this = self.expression(ignore_respect.__class__, this=this) 6651 6652 this = self._parse_respect_or_ignore_nulls(this) 6653 6654 # bigquery select from window x AS (partition by ...) 6655 if alias: 6656 over = None 6657 self._match(TokenType.ALIAS) 6658 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6659 return this 6660 else: 6661 over = self._prev.text.upper() 6662 6663 if comments and isinstance(func, exp.Expression): 6664 func.pop_comments() 6665 6666 if not self._match(TokenType.L_PAREN): 6667 return self.expression( 6668 exp.Window, 6669 comments=comments, 6670 this=this, 6671 alias=self._parse_id_var(False), 6672 over=over, 6673 ) 6674 6675 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6676 6677 first = self._match(TokenType.FIRST) 6678 if self._match_text_seq("LAST"): 6679 first = False 6680 6681 partition, order = self._parse_partition_and_order() 6682 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6683 6684 if kind: 6685 self._match(TokenType.BETWEEN) 6686 start = self._parse_window_spec() 6687 self._match(TokenType.AND) 6688 end = self._parse_window_spec() 6689 6690 spec = self.expression( 6691 exp.WindowSpec, 6692 kind=kind, 6693 start=start["value"], 6694 start_side=start["side"], 6695 end=end["value"], 6696 end_side=end["side"], 6697 ) 6698 else: 6699 spec = None 6700 6701 self._match_r_paren() 6702 6703 window = self.expression( 6704 exp.Window, 6705 comments=comments, 6706 this=this, 6707 partition_by=partition, 6708 order=order, 6709 spec=spec, 6710 alias=window_alias, 6711 over=over, 6712 first=first, 6713 ) 6714 6715 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6716 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6717 return self._parse_window(window, alias=alias) 6718 6719 return window 6720 6721 def _parse_partition_and_order( 6722 self, 6723 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6724 return self._parse_partition_by(), self._parse_order() 6725 6726 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6727 self._match(TokenType.BETWEEN) 6728 6729 return { 6730 "value": ( 6731 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6732 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6733 or self._parse_bitwise() 6734 ), 6735 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6736 } 6737 6738 def _parse_alias( 6739 self, this: t.Optional[exp.Expression], explicit: bool = False 6740 ) -> t.Optional[exp.Expression]: 6741 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6742 # so this section tries to parse the clause version and if it fails, it treats the token 6743 # as an identifier (alias) 6744 if self._can_parse_limit_or_offset(): 6745 return this 6746 6747 any_token = self._match(TokenType.ALIAS) 6748 comments = self._prev_comments or [] 6749 6750 if explicit and not any_token: 6751 return this 6752 6753 if self._match(TokenType.L_PAREN): 6754 aliases = self.expression( 6755 exp.Aliases, 6756 comments=comments, 6757 this=this, 6758 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6759 ) 6760 self._match_r_paren(aliases) 6761 return aliases 6762 6763 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6764 self.STRING_ALIASES and self._parse_string_as_identifier() 6765 ) 6766 6767 if alias: 6768 comments.extend(alias.pop_comments()) 6769 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6770 column = this.this 6771 6772 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6773 if not this.comments and column and column.comments: 6774 this.comments = column.pop_comments() 6775 6776 return this 6777 6778 def _parse_id_var( 6779 self, 6780 any_token: bool = True, 6781 tokens: t.Optional[t.Collection[TokenType]] = None, 6782 ) -> t.Optional[exp.Expression]: 6783 expression = self._parse_identifier() 6784 if not expression and ( 6785 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6786 ): 6787 quoted = self._prev.token_type == TokenType.STRING 6788 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6789 6790 return expression 6791 6792 def _parse_string(self) -> t.Optional[exp.Expression]: 6793 if self._match_set(self.STRING_PARSERS): 6794 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6795 return self._parse_placeholder() 6796 6797 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6798 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6799 6800 def _parse_number(self) -> t.Optional[exp.Expression]: 6801 if self._match_set(self.NUMERIC_PARSERS): 6802 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6803 return self._parse_placeholder() 6804 6805 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6806 if self._match(TokenType.IDENTIFIER): 6807 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6808 return self._parse_placeholder() 6809 6810 def _parse_var( 6811 self, 6812 any_token: bool = False, 6813 tokens: t.Optional[t.Collection[TokenType]] = None, 6814 upper: bool = False, 6815 ) -> t.Optional[exp.Expression]: 6816 if ( 6817 (any_token and self._advance_any()) 6818 or self._match(TokenType.VAR) 6819 or (self._match_set(tokens) if tokens else False) 6820 ): 6821 return self.expression( 6822 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6823 ) 6824 return self._parse_placeholder() 6825 6826 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6827 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6828 self._advance() 6829 return self._prev 6830 return None 6831 6832 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6833 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6834 6835 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6836 return self._parse_primary() or self._parse_var(any_token=True) 6837 6838 def _parse_null(self) -> t.Optional[exp.Expression]: 6839 if self._match_set(self.NULL_TOKENS): 6840 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6841 return self._parse_placeholder() 6842 6843 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6844 if self._match(TokenType.TRUE): 6845 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6846 if self._match(TokenType.FALSE): 6847 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6848 return self._parse_placeholder() 6849 6850 def _parse_star(self) -> t.Optional[exp.Expression]: 6851 if self._match(TokenType.STAR): 6852 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6853 return self._parse_placeholder() 6854 6855 def _parse_parameter(self) -> exp.Parameter: 6856 this = self._parse_identifier() or self._parse_primary_or_var() 6857 return self.expression(exp.Parameter, this=this) 6858 6859 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6860 if self._match_set(self.PLACEHOLDER_PARSERS): 6861 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6862 if placeholder: 6863 return placeholder 6864 self._advance(-1) 6865 return None 6866 6867 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6868 if not self._match_texts(keywords): 6869 return None 6870 if self._match(TokenType.L_PAREN, advance=False): 6871 return self._parse_wrapped_csv(self._parse_expression) 6872 6873 expression = self._parse_expression() 6874 return [expression] if expression else None 6875 6876 def _parse_csv( 6877 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6878 ) -> t.List[exp.Expression]: 6879 parse_result = parse_method() 6880 items = [parse_result] if parse_result is not None else [] 6881 6882 while self._match(sep): 6883 self._add_comments(parse_result) 6884 parse_result = parse_method() 6885 if parse_result is not None: 6886 items.append(parse_result) 6887 6888 return items 6889 6890 def _parse_tokens( 6891 self, parse_method: t.Callable, expressions: t.Dict 6892 ) -> t.Optional[exp.Expression]: 6893 this = parse_method() 6894 6895 while self._match_set(expressions): 6896 this = self.expression( 6897 expressions[self._prev.token_type], 6898 this=this, 6899 comments=self._prev_comments, 6900 expression=parse_method(), 6901 ) 6902 6903 return this 6904 6905 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6906 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6907 6908 def _parse_wrapped_csv( 6909 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6910 ) -> t.List[exp.Expression]: 6911 return self._parse_wrapped( 6912 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6913 ) 6914 6915 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6916 wrapped = self._match(TokenType.L_PAREN) 6917 if not wrapped and not optional: 6918 self.raise_error("Expecting (") 6919 parse_result = parse_method() 6920 if wrapped: 6921 self._match_r_paren() 6922 return parse_result 6923 6924 def _parse_expressions(self) -> t.List[exp.Expression]: 6925 return self._parse_csv(self._parse_expression) 6926 6927 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6928 return self._parse_select() or self._parse_set_operations( 6929 self._parse_alias(self._parse_assignment(), explicit=True) 6930 if alias 6931 else self._parse_assignment() 6932 ) 6933 6934 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6935 return self._parse_query_modifiers( 6936 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6937 ) 6938 6939 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6940 this = None 6941 if self._match_texts(self.TRANSACTION_KIND): 6942 this = self._prev.text 6943 6944 self._match_texts(("TRANSACTION", "WORK")) 6945 6946 modes = [] 6947 while True: 6948 mode = [] 6949 while self._match(TokenType.VAR): 6950 mode.append(self._prev.text) 6951 6952 if mode: 6953 modes.append(" ".join(mode)) 6954 if not self._match(TokenType.COMMA): 6955 break 6956 6957 return self.expression(exp.Transaction, this=this, modes=modes) 6958 6959 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6960 chain = None 6961 savepoint = None 6962 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6963 6964 self._match_texts(("TRANSACTION", "WORK")) 6965 6966 if self._match_text_seq("TO"): 6967 self._match_text_seq("SAVEPOINT") 6968 savepoint = self._parse_id_var() 6969 6970 if self._match(TokenType.AND): 6971 chain = not self._match_text_seq("NO") 6972 self._match_text_seq("CHAIN") 6973 6974 if is_rollback: 6975 return self.expression(exp.Rollback, savepoint=savepoint) 6976 6977 return self.expression(exp.Commit, chain=chain) 6978 6979 def _parse_refresh(self) -> exp.Refresh: 6980 self._match(TokenType.TABLE) 6981 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6982 6983 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6984 if not self._match_text_seq("ADD"): 6985 return None 6986 6987 self._match(TokenType.COLUMN) 6988 exists_column = self._parse_exists(not_=True) 6989 expression = self._parse_field_def() 6990 6991 if expression: 6992 expression.set("exists", exists_column) 6993 6994 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6995 if self._match_texts(("FIRST", "AFTER")): 6996 position = self._prev.text 6997 column_position = self.expression( 6998 exp.ColumnPosition, this=self._parse_column(), position=position 6999 ) 7000 expression.set("position", column_position) 7001 7002 return expression 7003 7004 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7005 drop = self._match(TokenType.DROP) and self._parse_drop() 7006 if drop and not isinstance(drop, exp.Command): 7007 drop.set("kind", drop.args.get("kind", "COLUMN")) 7008 return drop 7009 7010 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7011 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7012 return self.expression( 7013 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7014 ) 7015 7016 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7017 index = self._index - 1 7018 7019 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7020 return self._parse_csv( 7021 lambda: self.expression( 7022 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7023 ) 7024 ) 7025 7026 self._retreat(index) 7027 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7028 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7029 7030 if self._match_text_seq("ADD", "COLUMNS"): 7031 schema = self._parse_schema() 7032 if schema: 7033 return [schema] 7034 return [] 7035 7036 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7037 7038 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7039 if self._match_texts(self.ALTER_ALTER_PARSERS): 7040 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7041 7042 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7043 # keyword after ALTER we default to parsing this statement 7044 self._match(TokenType.COLUMN) 7045 column = self._parse_field(any_token=True) 7046 7047 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7048 return self.expression(exp.AlterColumn, this=column, drop=True) 7049 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7050 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7051 if self._match(TokenType.COMMENT): 7052 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7053 if self._match_text_seq("DROP", "NOT", "NULL"): 7054 return self.expression( 7055 exp.AlterColumn, 7056 this=column, 7057 drop=True, 7058 allow_null=True, 7059 ) 7060 if self._match_text_seq("SET", "NOT", "NULL"): 7061 return self.expression( 7062 exp.AlterColumn, 7063 this=column, 7064 allow_null=False, 7065 ) 7066 self._match_text_seq("SET", "DATA") 7067 self._match_text_seq("TYPE") 7068 return self.expression( 7069 exp.AlterColumn, 7070 this=column, 7071 dtype=self._parse_types(), 7072 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7073 using=self._match(TokenType.USING) and self._parse_assignment(), 7074 ) 7075 7076 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7077 if self._match_texts(("ALL", "EVEN", "AUTO")): 7078 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7079 7080 self._match_text_seq("KEY", "DISTKEY") 7081 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7082 7083 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7084 if compound: 7085 self._match_text_seq("SORTKEY") 7086 7087 if self._match(TokenType.L_PAREN, advance=False): 7088 return self.expression( 7089 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7090 ) 7091 7092 self._match_texts(("AUTO", "NONE")) 7093 return self.expression( 7094 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7095 ) 7096 7097 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7098 index = self._index - 1 7099 7100 partition_exists = self._parse_exists() 7101 if self._match(TokenType.PARTITION, advance=False): 7102 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7103 7104 self._retreat(index) 7105 return self._parse_csv(self._parse_drop_column) 7106 7107 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7108 if self._match(TokenType.COLUMN): 7109 exists = self._parse_exists() 7110 old_column = self._parse_column() 7111 to = self._match_text_seq("TO") 7112 new_column = self._parse_column() 7113 7114 if old_column is None or to is None or new_column is None: 7115 return None 7116 7117 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7118 7119 self._match_text_seq("TO") 7120 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7121 7122 def _parse_alter_table_set(self) -> exp.AlterSet: 7123 alter_set = self.expression(exp.AlterSet) 7124 7125 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7126 "TABLE", "PROPERTIES" 7127 ): 7128 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7129 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7130 alter_set.set("expressions", [self._parse_assignment()]) 7131 elif self._match_texts(("LOGGED", "UNLOGGED")): 7132 alter_set.set("option", exp.var(self._prev.text.upper())) 7133 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7134 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7135 elif self._match_text_seq("LOCATION"): 7136 alter_set.set("location", self._parse_field()) 7137 elif self._match_text_seq("ACCESS", "METHOD"): 7138 alter_set.set("access_method", self._parse_field()) 7139 elif self._match_text_seq("TABLESPACE"): 7140 alter_set.set("tablespace", self._parse_field()) 7141 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7142 alter_set.set("file_format", [self._parse_field()]) 7143 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7144 alter_set.set("file_format", self._parse_wrapped_options()) 7145 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7146 alter_set.set("copy_options", self._parse_wrapped_options()) 7147 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7148 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7149 else: 7150 if self._match_text_seq("SERDE"): 7151 alter_set.set("serde", self._parse_field()) 7152 7153 alter_set.set("expressions", [self._parse_properties()]) 7154 7155 return alter_set 7156 7157 def _parse_alter(self) -> exp.Alter | exp.Command: 7158 start = self._prev 7159 7160 alter_token = self._match_set(self.ALTERABLES) and self._prev 7161 if not alter_token: 7162 return self._parse_as_command(start) 7163 7164 exists = self._parse_exists() 7165 only = self._match_text_seq("ONLY") 7166 this = self._parse_table(schema=True) 7167 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7168 7169 if self._next: 7170 self._advance() 7171 7172 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7173 if parser: 7174 actions = ensure_list(parser(self)) 7175 not_valid = self._match_text_seq("NOT", "VALID") 7176 options = self._parse_csv(self._parse_property) 7177 7178 if not self._curr and actions: 7179 return self.expression( 7180 exp.Alter, 7181 this=this, 7182 kind=alter_token.text.upper(), 7183 exists=exists, 7184 actions=actions, 7185 only=only, 7186 options=options, 7187 cluster=cluster, 7188 not_valid=not_valid, 7189 ) 7190 7191 return self._parse_as_command(start) 7192 7193 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7194 start = self._prev 7195 # https://duckdb.org/docs/sql/statements/analyze 7196 if not self._curr: 7197 return self.expression(exp.Analyze) 7198 7199 options = [] 7200 while self._match_texts(self.ANALYZE_STYLES): 7201 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7202 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7203 else: 7204 options.append(self._prev.text.upper()) 7205 7206 this: t.Optional[exp.Expression] = None 7207 inner_expression: t.Optional[exp.Expression] = None 7208 7209 kind = self._curr and self._curr.text.upper() 7210 7211 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7212 this = self._parse_table_parts() 7213 elif self._match_text_seq("TABLES"): 7214 if self._match_set((TokenType.FROM, TokenType.IN)): 7215 kind = f"{kind} {self._prev.text.upper()}" 7216 this = self._parse_table(schema=True, is_db_reference=True) 7217 elif self._match_text_seq("DATABASE"): 7218 this = self._parse_table(schema=True, is_db_reference=True) 7219 elif self._match_text_seq("CLUSTER"): 7220 this = self._parse_table() 7221 # Try matching inner expr keywords before fallback to parse table. 7222 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7223 kind = None 7224 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7225 else: 7226 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7227 kind = None 7228 this = self._parse_table_parts() 7229 7230 partition = self._try_parse(self._parse_partition) 7231 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7232 return self._parse_as_command(start) 7233 7234 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7235 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7236 "WITH", "ASYNC", "MODE" 7237 ): 7238 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7239 else: 7240 mode = None 7241 7242 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7243 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7244 7245 properties = self._parse_properties() 7246 return self.expression( 7247 exp.Analyze, 7248 kind=kind, 7249 this=this, 7250 mode=mode, 7251 partition=partition, 7252 properties=properties, 7253 expression=inner_expression, 7254 options=options, 7255 ) 7256 7257 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7258 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7259 this = None 7260 kind = self._prev.text.upper() 7261 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7262 expressions = [] 7263 7264 if not self._match_text_seq("STATISTICS"): 7265 self.raise_error("Expecting token STATISTICS") 7266 7267 if self._match_text_seq("NOSCAN"): 7268 this = "NOSCAN" 7269 elif self._match(TokenType.FOR): 7270 if self._match_text_seq("ALL", "COLUMNS"): 7271 this = "FOR ALL COLUMNS" 7272 if self._match_texts("COLUMNS"): 7273 this = "FOR COLUMNS" 7274 expressions = self._parse_csv(self._parse_column_reference) 7275 elif self._match_text_seq("SAMPLE"): 7276 sample = self._parse_number() 7277 expressions = [ 7278 self.expression( 7279 exp.AnalyzeSample, 7280 sample=sample, 7281 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7282 ) 7283 ] 7284 7285 return self.expression( 7286 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7287 ) 7288 7289 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7290 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7291 kind = None 7292 this = None 7293 expression: t.Optional[exp.Expression] = None 7294 if self._match_text_seq("REF", "UPDATE"): 7295 kind = "REF" 7296 this = "UPDATE" 7297 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7298 this = "UPDATE SET DANGLING TO NULL" 7299 elif self._match_text_seq("STRUCTURE"): 7300 kind = "STRUCTURE" 7301 if self._match_text_seq("CASCADE", "FAST"): 7302 this = "CASCADE FAST" 7303 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7304 ("ONLINE", "OFFLINE") 7305 ): 7306 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7307 expression = self._parse_into() 7308 7309 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7310 7311 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7312 this = self._prev.text.upper() 7313 if self._match_text_seq("COLUMNS"): 7314 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7315 return None 7316 7317 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7318 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7319 if self._match_text_seq("STATISTICS"): 7320 return self.expression(exp.AnalyzeDelete, kind=kind) 7321 return None 7322 7323 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7324 if self._match_text_seq("CHAINED", "ROWS"): 7325 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7326 return None 7327 7328 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7329 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7330 this = self._prev.text.upper() 7331 expression: t.Optional[exp.Expression] = None 7332 expressions = [] 7333 update_options = None 7334 7335 if self._match_text_seq("HISTOGRAM", "ON"): 7336 expressions = self._parse_csv(self._parse_column_reference) 7337 with_expressions = [] 7338 while self._match(TokenType.WITH): 7339 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7340 if self._match_texts(("SYNC", "ASYNC")): 7341 if self._match_text_seq("MODE", advance=False): 7342 with_expressions.append(f"{self._prev.text.upper()} MODE") 7343 self._advance() 7344 else: 7345 buckets = self._parse_number() 7346 if self._match_text_seq("BUCKETS"): 7347 with_expressions.append(f"{buckets} BUCKETS") 7348 if with_expressions: 7349 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7350 7351 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7352 TokenType.UPDATE, advance=False 7353 ): 7354 update_options = self._prev.text.upper() 7355 self._advance() 7356 elif self._match_text_seq("USING", "DATA"): 7357 expression = self.expression(exp.UsingData, this=self._parse_string()) 7358 7359 return self.expression( 7360 exp.AnalyzeHistogram, 7361 this=this, 7362 expressions=expressions, 7363 expression=expression, 7364 update_options=update_options, 7365 ) 7366 7367 def _parse_merge(self) -> exp.Merge: 7368 self._match(TokenType.INTO) 7369 target = self._parse_table() 7370 7371 if target and self._match(TokenType.ALIAS, advance=False): 7372 target.set("alias", self._parse_table_alias()) 7373 7374 self._match(TokenType.USING) 7375 using = self._parse_table() 7376 7377 self._match(TokenType.ON) 7378 on = self._parse_assignment() 7379 7380 return self.expression( 7381 exp.Merge, 7382 this=target, 7383 using=using, 7384 on=on, 7385 whens=self._parse_when_matched(), 7386 returning=self._parse_returning(), 7387 ) 7388 7389 def _parse_when_matched(self) -> exp.Whens: 7390 whens = [] 7391 7392 while self._match(TokenType.WHEN): 7393 matched = not self._match(TokenType.NOT) 7394 self._match_text_seq("MATCHED") 7395 source = ( 7396 False 7397 if self._match_text_seq("BY", "TARGET") 7398 else self._match_text_seq("BY", "SOURCE") 7399 ) 7400 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7401 7402 self._match(TokenType.THEN) 7403 7404 if self._match(TokenType.INSERT): 7405 this = self._parse_star() 7406 if this: 7407 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7408 else: 7409 then = self.expression( 7410 exp.Insert, 7411 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7412 expression=self._match_text_seq("VALUES") and self._parse_value(), 7413 ) 7414 elif self._match(TokenType.UPDATE): 7415 expressions = self._parse_star() 7416 if expressions: 7417 then = self.expression(exp.Update, expressions=expressions) 7418 else: 7419 then = self.expression( 7420 exp.Update, 7421 expressions=self._match(TokenType.SET) 7422 and self._parse_csv(self._parse_equality), 7423 ) 7424 elif self._match(TokenType.DELETE): 7425 then = self.expression(exp.Var, this=self._prev.text) 7426 else: 7427 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7428 7429 whens.append( 7430 self.expression( 7431 exp.When, 7432 matched=matched, 7433 source=source, 7434 condition=condition, 7435 then=then, 7436 ) 7437 ) 7438 return self.expression(exp.Whens, expressions=whens) 7439 7440 def _parse_show(self) -> t.Optional[exp.Expression]: 7441 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7442 if parser: 7443 return parser(self) 7444 return self._parse_as_command(self._prev) 7445 7446 def _parse_set_item_assignment( 7447 self, kind: t.Optional[str] = None 7448 ) -> t.Optional[exp.Expression]: 7449 index = self._index 7450 7451 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7452 return self._parse_set_transaction(global_=kind == "GLOBAL") 7453 7454 left = self._parse_primary() or self._parse_column() 7455 assignment_delimiter = self._match_texts(("=", "TO")) 7456 7457 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7458 self._retreat(index) 7459 return None 7460 7461 right = self._parse_statement() or self._parse_id_var() 7462 if isinstance(right, (exp.Column, exp.Identifier)): 7463 right = exp.var(right.name) 7464 7465 this = self.expression(exp.EQ, this=left, expression=right) 7466 return self.expression(exp.SetItem, this=this, kind=kind) 7467 7468 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7469 self._match_text_seq("TRANSACTION") 7470 characteristics = self._parse_csv( 7471 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7472 ) 7473 return self.expression( 7474 exp.SetItem, 7475 expressions=characteristics, 7476 kind="TRANSACTION", 7477 **{"global": global_}, # type: ignore 7478 ) 7479 7480 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7481 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7482 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7483 7484 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7485 index = self._index 7486 set_ = self.expression( 7487 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7488 ) 7489 7490 if self._curr: 7491 self._retreat(index) 7492 return self._parse_as_command(self._prev) 7493 7494 return set_ 7495 7496 def _parse_var_from_options( 7497 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7498 ) -> t.Optional[exp.Var]: 7499 start = self._curr 7500 if not start: 7501 return None 7502 7503 option = start.text.upper() 7504 continuations = options.get(option) 7505 7506 index = self._index 7507 self._advance() 7508 for keywords in continuations or []: 7509 if isinstance(keywords, str): 7510 keywords = (keywords,) 7511 7512 if self._match_text_seq(*keywords): 7513 option = f"{option} {' '.join(keywords)}" 7514 break 7515 else: 7516 if continuations or continuations is None: 7517 if raise_unmatched: 7518 self.raise_error(f"Unknown option {option}") 7519 7520 self._retreat(index) 7521 return None 7522 7523 return exp.var(option) 7524 7525 def _parse_as_command(self, start: Token) -> exp.Command: 7526 while self._curr: 7527 self._advance() 7528 text = self._find_sql(start, self._prev) 7529 size = len(start.text) 7530 self._warn_unsupported() 7531 return exp.Command(this=text[:size], expression=text[size:]) 7532 7533 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7534 settings = [] 7535 7536 self._match_l_paren() 7537 kind = self._parse_id_var() 7538 7539 if self._match(TokenType.L_PAREN): 7540 while True: 7541 key = self._parse_id_var() 7542 value = self._parse_primary() 7543 if not key and value is None: 7544 break 7545 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7546 self._match(TokenType.R_PAREN) 7547 7548 self._match_r_paren() 7549 7550 return self.expression( 7551 exp.DictProperty, 7552 this=this, 7553 kind=kind.this if kind else None, 7554 settings=settings, 7555 ) 7556 7557 def _parse_dict_range(self, this: str) -> exp.DictRange: 7558 self._match_l_paren() 7559 has_min = self._match_text_seq("MIN") 7560 if has_min: 7561 min = self._parse_var() or self._parse_primary() 7562 self._match_text_seq("MAX") 7563 max = self._parse_var() or self._parse_primary() 7564 else: 7565 max = self._parse_var() or self._parse_primary() 7566 min = exp.Literal.number(0) 7567 self._match_r_paren() 7568 return self.expression(exp.DictRange, this=this, min=min, max=max) 7569 7570 def _parse_comprehension( 7571 self, this: t.Optional[exp.Expression] 7572 ) -> t.Optional[exp.Comprehension]: 7573 index = self._index 7574 expression = self._parse_column() 7575 if not self._match(TokenType.IN): 7576 self._retreat(index - 1) 7577 return None 7578 iterator = self._parse_column() 7579 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7580 return self.expression( 7581 exp.Comprehension, 7582 this=this, 7583 expression=expression, 7584 iterator=iterator, 7585 condition=condition, 7586 ) 7587 7588 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7589 if self._match(TokenType.HEREDOC_STRING): 7590 return self.expression(exp.Heredoc, this=self._prev.text) 7591 7592 if not self._match_text_seq("$"): 7593 return None 7594 7595 tags = ["$"] 7596 tag_text = None 7597 7598 if self._is_connected(): 7599 self._advance() 7600 tags.append(self._prev.text.upper()) 7601 else: 7602 self.raise_error("No closing $ found") 7603 7604 if tags[-1] != "$": 7605 if self._is_connected() and self._match_text_seq("$"): 7606 tag_text = tags[-1] 7607 tags.append("$") 7608 else: 7609 self.raise_error("No closing $ found") 7610 7611 heredoc_start = self._curr 7612 7613 while self._curr: 7614 if self._match_text_seq(*tags, advance=False): 7615 this = self._find_sql(heredoc_start, self._prev) 7616 self._advance(len(tags)) 7617 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7618 7619 self._advance() 7620 7621 self.raise_error(f"No closing {''.join(tags)} found") 7622 return None 7623 7624 def _find_parser( 7625 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7626 ) -> t.Optional[t.Callable]: 7627 if not self._curr: 7628 return None 7629 7630 index = self._index 7631 this = [] 7632 while True: 7633 # The current token might be multiple words 7634 curr = self._curr.text.upper() 7635 key = curr.split(" ") 7636 this.append(curr) 7637 7638 self._advance() 7639 result, trie = in_trie(trie, key) 7640 if result == TrieResult.FAILED: 7641 break 7642 7643 if result == TrieResult.EXISTS: 7644 subparser = parsers[" ".join(this)] 7645 return subparser 7646 7647 self._retreat(index) 7648 return None 7649 7650 def _match(self, token_type, advance=True, expression=None): 7651 if not self._curr: 7652 return None 7653 7654 if self._curr.token_type == token_type: 7655 if advance: 7656 self._advance() 7657 self._add_comments(expression) 7658 return True 7659 7660 return None 7661 7662 def _match_set(self, types, advance=True): 7663 if not self._curr: 7664 return None 7665 7666 if self._curr.token_type in types: 7667 if advance: 7668 self._advance() 7669 return True 7670 7671 return None 7672 7673 def _match_pair(self, token_type_a, token_type_b, advance=True): 7674 if not self._curr or not self._next: 7675 return None 7676 7677 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7678 if advance: 7679 self._advance(2) 7680 return True 7681 7682 return None 7683 7684 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7685 if not self._match(TokenType.L_PAREN, expression=expression): 7686 self.raise_error("Expecting (") 7687 7688 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7689 if not self._match(TokenType.R_PAREN, expression=expression): 7690 self.raise_error("Expecting )") 7691 7692 def _match_texts(self, texts, advance=True): 7693 if ( 7694 self._curr 7695 and self._curr.token_type != TokenType.STRING 7696 and self._curr.text.upper() in texts 7697 ): 7698 if advance: 7699 self._advance() 7700 return True 7701 return None 7702 7703 def _match_text_seq(self, *texts, advance=True): 7704 index = self._index 7705 for text in texts: 7706 if ( 7707 self._curr 7708 and self._curr.token_type != TokenType.STRING 7709 and self._curr.text.upper() == text 7710 ): 7711 self._advance() 7712 else: 7713 self._retreat(index) 7714 return None 7715 7716 if not advance: 7717 self._retreat(index) 7718 7719 return True 7720 7721 def _replace_lambda( 7722 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7723 ) -> t.Optional[exp.Expression]: 7724 if not node: 7725 return node 7726 7727 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7728 7729 for column in node.find_all(exp.Column): 7730 typ = lambda_types.get(column.parts[0].name) 7731 if typ is not None: 7732 dot_or_id = column.to_dot() if column.table else column.this 7733 7734 if typ: 7735 dot_or_id = self.expression( 7736 exp.Cast, 7737 this=dot_or_id, 7738 to=typ, 7739 ) 7740 7741 parent = column.parent 7742 7743 while isinstance(parent, exp.Dot): 7744 if not isinstance(parent.parent, exp.Dot): 7745 parent.replace(dot_or_id) 7746 break 7747 parent = parent.parent 7748 else: 7749 if column is node: 7750 node = dot_or_id 7751 else: 7752 column.replace(dot_or_id) 7753 return node 7754 7755 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7756 start = self._prev 7757 7758 # Not to be confused with TRUNCATE(number, decimals) function call 7759 if self._match(TokenType.L_PAREN): 7760 self._retreat(self._index - 2) 7761 return self._parse_function() 7762 7763 # Clickhouse supports TRUNCATE DATABASE as well 7764 is_database = self._match(TokenType.DATABASE) 7765 7766 self._match(TokenType.TABLE) 7767 7768 exists = self._parse_exists(not_=False) 7769 7770 expressions = self._parse_csv( 7771 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7772 ) 7773 7774 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7775 7776 if self._match_text_seq("RESTART", "IDENTITY"): 7777 identity = "RESTART" 7778 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7779 identity = "CONTINUE" 7780 else: 7781 identity = None 7782 7783 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7784 option = self._prev.text 7785 else: 7786 option = None 7787 7788 partition = self._parse_partition() 7789 7790 # Fallback case 7791 if self._curr: 7792 return self._parse_as_command(start) 7793 7794 return self.expression( 7795 exp.TruncateTable, 7796 expressions=expressions, 7797 is_database=is_database, 7798 exists=exists, 7799 cluster=cluster, 7800 identity=identity, 7801 option=option, 7802 partition=partition, 7803 ) 7804 7805 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7806 this = self._parse_ordered(self._parse_opclass) 7807 7808 if not self._match(TokenType.WITH): 7809 return this 7810 7811 op = self._parse_var(any_token=True) 7812 7813 return self.expression(exp.WithOperator, this=this, op=op) 7814 7815 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7816 self._match(TokenType.EQ) 7817 self._match(TokenType.L_PAREN) 7818 7819 opts: t.List[t.Optional[exp.Expression]] = [] 7820 while self._curr and not self._match(TokenType.R_PAREN): 7821 if self._match_text_seq("FORMAT_NAME", "="): 7822 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7823 # so we parse it separately to use _parse_field() 7824 prop = self.expression( 7825 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7826 ) 7827 opts.append(prop) 7828 else: 7829 opts.append(self._parse_property()) 7830 7831 self._match(TokenType.COMMA) 7832 7833 return opts 7834 7835 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7836 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7837 7838 options = [] 7839 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7840 option = self._parse_var(any_token=True) 7841 prev = self._prev.text.upper() 7842 7843 # Different dialects might separate options and values by white space, "=" and "AS" 7844 self._match(TokenType.EQ) 7845 self._match(TokenType.ALIAS) 7846 7847 param = self.expression(exp.CopyParameter, this=option) 7848 7849 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7850 TokenType.L_PAREN, advance=False 7851 ): 7852 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7853 param.set("expressions", self._parse_wrapped_options()) 7854 elif prev == "FILE_FORMAT": 7855 # T-SQL's external file format case 7856 param.set("expression", self._parse_field()) 7857 else: 7858 param.set("expression", self._parse_unquoted_field()) 7859 7860 options.append(param) 7861 self._match(sep) 7862 7863 return options 7864 7865 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7866 expr = self.expression(exp.Credentials) 7867 7868 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7869 expr.set("storage", self._parse_field()) 7870 if self._match_text_seq("CREDENTIALS"): 7871 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7872 creds = ( 7873 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7874 ) 7875 expr.set("credentials", creds) 7876 if self._match_text_seq("ENCRYPTION"): 7877 expr.set("encryption", self._parse_wrapped_options()) 7878 if self._match_text_seq("IAM_ROLE"): 7879 expr.set("iam_role", self._parse_field()) 7880 if self._match_text_seq("REGION"): 7881 expr.set("region", self._parse_field()) 7882 7883 return expr 7884 7885 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7886 return self._parse_field() 7887 7888 def _parse_copy(self) -> exp.Copy | exp.Command: 7889 start = self._prev 7890 7891 self._match(TokenType.INTO) 7892 7893 this = ( 7894 self._parse_select(nested=True, parse_subquery_alias=False) 7895 if self._match(TokenType.L_PAREN, advance=False) 7896 else self._parse_table(schema=True) 7897 ) 7898 7899 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7900 7901 files = self._parse_csv(self._parse_file_location) 7902 credentials = self._parse_credentials() 7903 7904 self._match_text_seq("WITH") 7905 7906 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7907 7908 # Fallback case 7909 if self._curr: 7910 return self._parse_as_command(start) 7911 7912 return self.expression( 7913 exp.Copy, 7914 this=this, 7915 kind=kind, 7916 credentials=credentials, 7917 files=files, 7918 params=params, 7919 ) 7920 7921 def _parse_normalize(self) -> exp.Normalize: 7922 return self.expression( 7923 exp.Normalize, 7924 this=self._parse_bitwise(), 7925 form=self._match(TokenType.COMMA) and self._parse_var(), 7926 ) 7927 7928 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 7929 args = self._parse_csv(lambda: self._parse_lambda()) 7930 7931 this = seq_get(args, 0) 7932 decimals = seq_get(args, 1) 7933 7934 return expr_type( 7935 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 7936 ) 7937 7938 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7939 if self._match_text_seq("COLUMNS", "(", advance=False): 7940 this = self._parse_function() 7941 if isinstance(this, exp.Columns): 7942 this.set("unpack", True) 7943 return this 7944 7945 return self.expression( 7946 exp.Star, 7947 **{ # type: ignore 7948 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7949 "replace": self._parse_star_op("REPLACE"), 7950 "rename": self._parse_star_op("RENAME"), 7951 }, 7952 ) 7953 7954 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7955 privilege_parts = [] 7956 7957 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7958 # (end of privilege list) or L_PAREN (start of column list) are met 7959 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7960 privilege_parts.append(self._curr.text.upper()) 7961 self._advance() 7962 7963 this = exp.var(" ".join(privilege_parts)) 7964 expressions = ( 7965 self._parse_wrapped_csv(self._parse_column) 7966 if self._match(TokenType.L_PAREN, advance=False) 7967 else None 7968 ) 7969 7970 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7971 7972 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7973 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7974 principal = self._parse_id_var() 7975 7976 if not principal: 7977 return None 7978 7979 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7980 7981 def _parse_grant(self) -> exp.Grant | exp.Command: 7982 start = self._prev 7983 7984 privileges = self._parse_csv(self._parse_grant_privilege) 7985 7986 self._match(TokenType.ON) 7987 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7988 7989 # Attempt to parse the securable e.g. MySQL allows names 7990 # such as "foo.*", "*.*" which are not easily parseable yet 7991 securable = self._try_parse(self._parse_table_parts) 7992 7993 if not securable or not self._match_text_seq("TO"): 7994 return self._parse_as_command(start) 7995 7996 principals = self._parse_csv(self._parse_grant_principal) 7997 7998 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7999 8000 if self._curr: 8001 return self._parse_as_command(start) 8002 8003 return self.expression( 8004 exp.Grant, 8005 privileges=privileges, 8006 kind=kind, 8007 securable=securable, 8008 principals=principals, 8009 grant_option=grant_option, 8010 ) 8011 8012 def _parse_overlay(self) -> exp.Overlay: 8013 return self.expression( 8014 exp.Overlay, 8015 **{ # type: ignore 8016 "this": self._parse_bitwise(), 8017 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8018 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8019 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8020 }, 8021 )
27def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 28 if len(args) == 1 and args[0].is_star: 29 return exp.StarMap(this=args[0]) 30 31 keys = [] 32 values = [] 33 for i in range(0, len(args), 2): 34 keys.append(args[i]) 35 values.append(args[i + 1]) 36 37 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
45def binary_range_parser( 46 expr_type: t.Type[exp.Expression], reverse_args: bool = False 47) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 48 def _parse_binary_range( 49 self: Parser, this: t.Optional[exp.Expression] 50 ) -> t.Optional[exp.Expression]: 51 expression = self._parse_bitwise() 52 if reverse_args: 53 this, expression = expression, this 54 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 55 56 return _parse_binary_range
59def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 60 # Default argument order is base, expression 61 this = seq_get(args, 0) 62 expression = seq_get(args, 1) 63 64 if expression: 65 if not dialect.LOG_BASE_FIRST: 66 this, expression = expression, this 67 return exp.Log(this=this, expression=expression) 68 69 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
89def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 90 def _builder(args: t.List, dialect: Dialect) -> E: 91 expression = expr_type( 92 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 93 ) 94 if len(args) > 2 and expr_type is exp.JSONExtract: 95 expression.set("expressions", args[2:]) 96 97 return expression 98 99 return _builder
102def build_mod(args: t.List) -> exp.Mod: 103 this = seq_get(args, 0) 104 expression = seq_get(args, 1) 105 106 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 107 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 108 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 109 110 return exp.Mod(this=this, expression=expression)
122def build_array_constructor( 123 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 124) -> exp.Expression: 125 array_exp = exp_class(expressions=args) 126 127 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 128 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 129 130 return array_exp
133def build_convert_timezone( 134 args: t.List, default_source_tz: t.Optional[str] = None 135) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 136 if len(args) == 2: 137 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 138 return exp.ConvertTimezone( 139 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 140 ) 141 142 return exp.ConvertTimezone.from_arg_list(args)
175class Parser(metaclass=_Parser): 176 """ 177 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 178 179 Args: 180 error_level: The desired error level. 181 Default: ErrorLevel.IMMEDIATE 182 error_message_context: The amount of context to capture from a query string when displaying 183 the error message (in number of characters). 184 Default: 100 185 max_errors: Maximum number of error messages to include in a raised ParseError. 186 This is only relevant if error_level is ErrorLevel.RAISE. 187 Default: 3 188 """ 189 190 FUNCTIONS: t.Dict[str, t.Callable] = { 191 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 192 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 193 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 194 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 195 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 196 ), 197 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 198 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 199 ), 200 "CHAR": lambda args: exp.Chr(expressions=args), 201 "CHR": lambda args: exp.Chr(expressions=args), 202 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 203 "CONCAT": lambda args, dialect: exp.Concat( 204 expressions=args, 205 safe=not dialect.STRICT_STRING_CONCAT, 206 coalesce=dialect.CONCAT_COALESCE, 207 ), 208 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 209 expressions=args, 210 safe=not dialect.STRICT_STRING_CONCAT, 211 coalesce=dialect.CONCAT_COALESCE, 212 ), 213 "CONVERT_TIMEZONE": build_convert_timezone, 214 "DATE_TO_DATE_STR": lambda args: exp.Cast( 215 this=seq_get(args, 0), 216 to=exp.DataType(this=exp.DataType.Type.TEXT), 217 ), 218 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 219 start=seq_get(args, 0), 220 end=seq_get(args, 1), 221 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 222 ), 223 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 224 "HEX": build_hex, 225 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 226 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 227 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 228 "LIKE": build_like, 229 "LOG": build_logarithm, 230 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 231 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 232 "LOWER": build_lower, 233 "LPAD": lambda args: build_pad(args), 234 "LEFTPAD": lambda args: build_pad(args), 235 "LTRIM": lambda args: build_trim(args), 236 "MOD": build_mod, 237 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 238 "RPAD": lambda args: build_pad(args, is_left=False), 239 "RTRIM": lambda args: build_trim(args, is_left=False), 240 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 241 if len(args) != 2 242 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 243 "STRPOS": exp.StrPosition.from_arg_list, 244 "CHARINDEX": lambda args: build_locate_strposition(args), 245 "INSTR": exp.StrPosition.from_arg_list, 246 "LOCATE": lambda args: build_locate_strposition(args), 247 "TIME_TO_TIME_STR": lambda args: exp.Cast( 248 this=seq_get(args, 0), 249 to=exp.DataType(this=exp.DataType.Type.TEXT), 250 ), 251 "TO_HEX": build_hex, 252 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 253 this=exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 start=exp.Literal.number(1), 258 length=exp.Literal.number(10), 259 ), 260 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 261 "UPPER": build_upper, 262 "VAR_MAP": build_var_map, 263 } 264 265 NO_PAREN_FUNCTIONS = { 266 TokenType.CURRENT_DATE: exp.CurrentDate, 267 TokenType.CURRENT_DATETIME: exp.CurrentDate, 268 TokenType.CURRENT_TIME: exp.CurrentTime, 269 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 270 TokenType.CURRENT_USER: exp.CurrentUser, 271 } 272 273 STRUCT_TYPE_TOKENS = { 274 TokenType.NESTED, 275 TokenType.OBJECT, 276 TokenType.STRUCT, 277 TokenType.UNION, 278 } 279 280 NESTED_TYPE_TOKENS = { 281 TokenType.ARRAY, 282 TokenType.LIST, 283 TokenType.LOWCARDINALITY, 284 TokenType.MAP, 285 TokenType.NULLABLE, 286 TokenType.RANGE, 287 *STRUCT_TYPE_TOKENS, 288 } 289 290 ENUM_TYPE_TOKENS = { 291 TokenType.DYNAMIC, 292 TokenType.ENUM, 293 TokenType.ENUM8, 294 TokenType.ENUM16, 295 } 296 297 AGGREGATE_TYPE_TOKENS = { 298 TokenType.AGGREGATEFUNCTION, 299 TokenType.SIMPLEAGGREGATEFUNCTION, 300 } 301 302 TYPE_TOKENS = { 303 TokenType.BIT, 304 TokenType.BOOLEAN, 305 TokenType.TINYINT, 306 TokenType.UTINYINT, 307 TokenType.SMALLINT, 308 TokenType.USMALLINT, 309 TokenType.INT, 310 TokenType.UINT, 311 TokenType.BIGINT, 312 TokenType.UBIGINT, 313 TokenType.INT128, 314 TokenType.UINT128, 315 TokenType.INT256, 316 TokenType.UINT256, 317 TokenType.MEDIUMINT, 318 TokenType.UMEDIUMINT, 319 TokenType.FIXEDSTRING, 320 TokenType.FLOAT, 321 TokenType.DOUBLE, 322 TokenType.CHAR, 323 TokenType.NCHAR, 324 TokenType.VARCHAR, 325 TokenType.NVARCHAR, 326 TokenType.BPCHAR, 327 TokenType.TEXT, 328 TokenType.MEDIUMTEXT, 329 TokenType.LONGTEXT, 330 TokenType.MEDIUMBLOB, 331 TokenType.LONGBLOB, 332 TokenType.BINARY, 333 TokenType.VARBINARY, 334 TokenType.JSON, 335 TokenType.JSONB, 336 TokenType.INTERVAL, 337 TokenType.TINYBLOB, 338 TokenType.TINYTEXT, 339 TokenType.TIME, 340 TokenType.TIMETZ, 341 TokenType.TIMESTAMP, 342 TokenType.TIMESTAMP_S, 343 TokenType.TIMESTAMP_MS, 344 TokenType.TIMESTAMP_NS, 345 TokenType.TIMESTAMPTZ, 346 TokenType.TIMESTAMPLTZ, 347 TokenType.TIMESTAMPNTZ, 348 TokenType.DATETIME, 349 TokenType.DATETIME2, 350 TokenType.DATETIME64, 351 TokenType.SMALLDATETIME, 352 TokenType.DATE, 353 TokenType.DATE32, 354 TokenType.INT4RANGE, 355 TokenType.INT4MULTIRANGE, 356 TokenType.INT8RANGE, 357 TokenType.INT8MULTIRANGE, 358 TokenType.NUMRANGE, 359 TokenType.NUMMULTIRANGE, 360 TokenType.TSRANGE, 361 TokenType.TSMULTIRANGE, 362 TokenType.TSTZRANGE, 363 TokenType.TSTZMULTIRANGE, 364 TokenType.DATERANGE, 365 TokenType.DATEMULTIRANGE, 366 TokenType.DECIMAL, 367 TokenType.DECIMAL32, 368 TokenType.DECIMAL64, 369 TokenType.DECIMAL128, 370 TokenType.DECIMAL256, 371 TokenType.UDECIMAL, 372 TokenType.BIGDECIMAL, 373 TokenType.UUID, 374 TokenType.GEOGRAPHY, 375 TokenType.GEOMETRY, 376 TokenType.POINT, 377 TokenType.RING, 378 TokenType.LINESTRING, 379 TokenType.MULTILINESTRING, 380 TokenType.POLYGON, 381 TokenType.MULTIPOLYGON, 382 TokenType.HLLSKETCH, 383 TokenType.HSTORE, 384 TokenType.PSEUDO_TYPE, 385 TokenType.SUPER, 386 TokenType.SERIAL, 387 TokenType.SMALLSERIAL, 388 TokenType.BIGSERIAL, 389 TokenType.XML, 390 TokenType.YEAR, 391 TokenType.UNIQUEIDENTIFIER, 392 TokenType.USERDEFINED, 393 TokenType.MONEY, 394 TokenType.SMALLMONEY, 395 TokenType.ROWVERSION, 396 TokenType.IMAGE, 397 TokenType.VARIANT, 398 TokenType.VECTOR, 399 TokenType.OBJECT, 400 TokenType.OBJECT_IDENTIFIER, 401 TokenType.INET, 402 TokenType.IPADDRESS, 403 TokenType.IPPREFIX, 404 TokenType.IPV4, 405 TokenType.IPV6, 406 TokenType.UNKNOWN, 407 TokenType.NULL, 408 TokenType.NAME, 409 TokenType.TDIGEST, 410 TokenType.DYNAMIC, 411 *ENUM_TYPE_TOKENS, 412 *NESTED_TYPE_TOKENS, 413 *AGGREGATE_TYPE_TOKENS, 414 } 415 416 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 417 TokenType.BIGINT: TokenType.UBIGINT, 418 TokenType.INT: TokenType.UINT, 419 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 420 TokenType.SMALLINT: TokenType.USMALLINT, 421 TokenType.TINYINT: TokenType.UTINYINT, 422 TokenType.DECIMAL: TokenType.UDECIMAL, 423 } 424 425 SUBQUERY_PREDICATES = { 426 TokenType.ANY: exp.Any, 427 TokenType.ALL: exp.All, 428 TokenType.EXISTS: exp.Exists, 429 TokenType.SOME: exp.Any, 430 } 431 432 RESERVED_TOKENS = { 433 *Tokenizer.SINGLE_TOKENS.values(), 434 TokenType.SELECT, 435 } - {TokenType.IDENTIFIER} 436 437 DB_CREATABLES = { 438 TokenType.DATABASE, 439 TokenType.DICTIONARY, 440 TokenType.MODEL, 441 TokenType.NAMESPACE, 442 TokenType.SCHEMA, 443 TokenType.SEQUENCE, 444 TokenType.SINK, 445 TokenType.SOURCE, 446 TokenType.STORAGE_INTEGRATION, 447 TokenType.STREAMLIT, 448 TokenType.TABLE, 449 TokenType.TAG, 450 TokenType.VIEW, 451 TokenType.WAREHOUSE, 452 } 453 454 CREATABLES = { 455 TokenType.COLUMN, 456 TokenType.CONSTRAINT, 457 TokenType.FOREIGN_KEY, 458 TokenType.FUNCTION, 459 TokenType.INDEX, 460 TokenType.PROCEDURE, 461 *DB_CREATABLES, 462 } 463 464 ALTERABLES = { 465 TokenType.INDEX, 466 TokenType.TABLE, 467 TokenType.VIEW, 468 } 469 470 # Tokens that can represent identifiers 471 ID_VAR_TOKENS = { 472 TokenType.ALL, 473 TokenType.ATTACH, 474 TokenType.VAR, 475 TokenType.ANTI, 476 TokenType.APPLY, 477 TokenType.ASC, 478 TokenType.ASOF, 479 TokenType.AUTO_INCREMENT, 480 TokenType.BEGIN, 481 TokenType.BPCHAR, 482 TokenType.CACHE, 483 TokenType.CASE, 484 TokenType.COLLATE, 485 TokenType.COMMAND, 486 TokenType.COMMENT, 487 TokenType.COMMIT, 488 TokenType.CONSTRAINT, 489 TokenType.COPY, 490 TokenType.CUBE, 491 TokenType.CURRENT_SCHEMA, 492 TokenType.DEFAULT, 493 TokenType.DELETE, 494 TokenType.DESC, 495 TokenType.DESCRIBE, 496 TokenType.DETACH, 497 TokenType.DICTIONARY, 498 TokenType.DIV, 499 TokenType.END, 500 TokenType.EXECUTE, 501 TokenType.EXPORT, 502 TokenType.ESCAPE, 503 TokenType.FALSE, 504 TokenType.FIRST, 505 TokenType.FILTER, 506 TokenType.FINAL, 507 TokenType.FORMAT, 508 TokenType.FULL, 509 TokenType.IDENTIFIER, 510 TokenType.IS, 511 TokenType.ISNULL, 512 TokenType.INTERVAL, 513 TokenType.KEEP, 514 TokenType.KILL, 515 TokenType.LEFT, 516 TokenType.LIMIT, 517 TokenType.LOAD, 518 TokenType.MERGE, 519 TokenType.NATURAL, 520 TokenType.NEXT, 521 TokenType.OFFSET, 522 TokenType.OPERATOR, 523 TokenType.ORDINALITY, 524 TokenType.OVERLAPS, 525 TokenType.OVERWRITE, 526 TokenType.PARTITION, 527 TokenType.PERCENT, 528 TokenType.PIVOT, 529 TokenType.PRAGMA, 530 TokenType.RANGE, 531 TokenType.RECURSIVE, 532 TokenType.REFERENCES, 533 TokenType.REFRESH, 534 TokenType.RENAME, 535 TokenType.REPLACE, 536 TokenType.RIGHT, 537 TokenType.ROLLUP, 538 TokenType.ROW, 539 TokenType.ROWS, 540 TokenType.SEMI, 541 TokenType.SET, 542 TokenType.SETTINGS, 543 TokenType.SHOW, 544 TokenType.TEMPORARY, 545 TokenType.TOP, 546 TokenType.TRUE, 547 TokenType.TRUNCATE, 548 TokenType.UNIQUE, 549 TokenType.UNNEST, 550 TokenType.UNPIVOT, 551 TokenType.UPDATE, 552 TokenType.USE, 553 TokenType.VOLATILE, 554 TokenType.WINDOW, 555 *CREATABLES, 556 *SUBQUERY_PREDICATES, 557 *TYPE_TOKENS, 558 *NO_PAREN_FUNCTIONS, 559 } 560 ID_VAR_TOKENS.remove(TokenType.UNION) 561 562 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 563 TokenType.ANTI, 564 TokenType.APPLY, 565 TokenType.ASOF, 566 TokenType.FULL, 567 TokenType.LEFT, 568 TokenType.LOCK, 569 TokenType.NATURAL, 570 TokenType.RIGHT, 571 TokenType.SEMI, 572 TokenType.WINDOW, 573 } 574 575 ALIAS_TOKENS = ID_VAR_TOKENS 576 577 ARRAY_CONSTRUCTORS = { 578 "ARRAY": exp.Array, 579 "LIST": exp.List, 580 } 581 582 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 583 584 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 585 586 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 587 588 FUNC_TOKENS = { 589 TokenType.COLLATE, 590 TokenType.COMMAND, 591 TokenType.CURRENT_DATE, 592 TokenType.CURRENT_DATETIME, 593 TokenType.CURRENT_SCHEMA, 594 TokenType.CURRENT_TIMESTAMP, 595 TokenType.CURRENT_TIME, 596 TokenType.CURRENT_USER, 597 TokenType.FILTER, 598 TokenType.FIRST, 599 TokenType.FORMAT, 600 TokenType.GLOB, 601 TokenType.IDENTIFIER, 602 TokenType.INDEX, 603 TokenType.ISNULL, 604 TokenType.ILIKE, 605 TokenType.INSERT, 606 TokenType.LIKE, 607 TokenType.MERGE, 608 TokenType.NEXT, 609 TokenType.OFFSET, 610 TokenType.PRIMARY_KEY, 611 TokenType.RANGE, 612 TokenType.REPLACE, 613 TokenType.RLIKE, 614 TokenType.ROW, 615 TokenType.UNNEST, 616 TokenType.VAR, 617 TokenType.LEFT, 618 TokenType.RIGHT, 619 TokenType.SEQUENCE, 620 TokenType.DATE, 621 TokenType.DATETIME, 622 TokenType.TABLE, 623 TokenType.TIMESTAMP, 624 TokenType.TIMESTAMPTZ, 625 TokenType.TRUNCATE, 626 TokenType.WINDOW, 627 TokenType.XOR, 628 *TYPE_TOKENS, 629 *SUBQUERY_PREDICATES, 630 } 631 632 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 633 TokenType.AND: exp.And, 634 } 635 636 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 637 TokenType.COLON_EQ: exp.PropertyEQ, 638 } 639 640 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 641 TokenType.OR: exp.Or, 642 } 643 644 EQUALITY = { 645 TokenType.EQ: exp.EQ, 646 TokenType.NEQ: exp.NEQ, 647 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 648 } 649 650 COMPARISON = { 651 TokenType.GT: exp.GT, 652 TokenType.GTE: exp.GTE, 653 TokenType.LT: exp.LT, 654 TokenType.LTE: exp.LTE, 655 } 656 657 BITWISE = { 658 TokenType.AMP: exp.BitwiseAnd, 659 TokenType.CARET: exp.BitwiseXor, 660 TokenType.PIPE: exp.BitwiseOr, 661 } 662 663 TERM = { 664 TokenType.DASH: exp.Sub, 665 TokenType.PLUS: exp.Add, 666 TokenType.MOD: exp.Mod, 667 TokenType.COLLATE: exp.Collate, 668 } 669 670 FACTOR = { 671 TokenType.DIV: exp.IntDiv, 672 TokenType.LR_ARROW: exp.Distance, 673 TokenType.SLASH: exp.Div, 674 TokenType.STAR: exp.Mul, 675 } 676 677 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 678 679 TIMES = { 680 TokenType.TIME, 681 TokenType.TIMETZ, 682 } 683 684 TIMESTAMPS = { 685 TokenType.TIMESTAMP, 686 TokenType.TIMESTAMPTZ, 687 TokenType.TIMESTAMPLTZ, 688 *TIMES, 689 } 690 691 SET_OPERATIONS = { 692 TokenType.UNION, 693 TokenType.INTERSECT, 694 TokenType.EXCEPT, 695 } 696 697 JOIN_METHODS = { 698 TokenType.ASOF, 699 TokenType.NATURAL, 700 TokenType.POSITIONAL, 701 } 702 703 JOIN_SIDES = { 704 TokenType.LEFT, 705 TokenType.RIGHT, 706 TokenType.FULL, 707 } 708 709 JOIN_KINDS = { 710 TokenType.ANTI, 711 TokenType.CROSS, 712 TokenType.INNER, 713 TokenType.OUTER, 714 TokenType.SEMI, 715 TokenType.STRAIGHT_JOIN, 716 } 717 718 JOIN_HINTS: t.Set[str] = set() 719 720 LAMBDAS = { 721 TokenType.ARROW: lambda self, expressions: self.expression( 722 exp.Lambda, 723 this=self._replace_lambda( 724 self._parse_assignment(), 725 expressions, 726 ), 727 expressions=expressions, 728 ), 729 TokenType.FARROW: lambda self, expressions: self.expression( 730 exp.Kwarg, 731 this=exp.var(expressions[0].name), 732 expression=self._parse_assignment(), 733 ), 734 } 735 736 COLUMN_OPERATORS = { 737 TokenType.DOT: None, 738 TokenType.DCOLON: lambda self, this, to: self.expression( 739 exp.Cast if self.STRICT_CAST else exp.TryCast, 740 this=this, 741 to=to, 742 ), 743 TokenType.ARROW: lambda self, this, path: self.expression( 744 exp.JSONExtract, 745 this=this, 746 expression=self.dialect.to_json_path(path), 747 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 748 ), 749 TokenType.DARROW: lambda self, this, path: self.expression( 750 exp.JSONExtractScalar, 751 this=this, 752 expression=self.dialect.to_json_path(path), 753 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 754 ), 755 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 756 exp.JSONBExtract, 757 this=this, 758 expression=path, 759 ), 760 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 761 exp.JSONBExtractScalar, 762 this=this, 763 expression=path, 764 ), 765 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 766 exp.JSONBContains, 767 this=this, 768 expression=key, 769 ), 770 } 771 772 EXPRESSION_PARSERS = { 773 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 774 exp.Column: lambda self: self._parse_column(), 775 exp.Condition: lambda self: self._parse_assignment(), 776 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 777 exp.Expression: lambda self: self._parse_expression(), 778 exp.From: lambda self: self._parse_from(joins=True), 779 exp.Group: lambda self: self._parse_group(), 780 exp.Having: lambda self: self._parse_having(), 781 exp.Hint: lambda self: self._parse_hint_body(), 782 exp.Identifier: lambda self: self._parse_id_var(), 783 exp.Join: lambda self: self._parse_join(), 784 exp.Lambda: lambda self: self._parse_lambda(), 785 exp.Lateral: lambda self: self._parse_lateral(), 786 exp.Limit: lambda self: self._parse_limit(), 787 exp.Offset: lambda self: self._parse_offset(), 788 exp.Order: lambda self: self._parse_order(), 789 exp.Ordered: lambda self: self._parse_ordered(), 790 exp.Properties: lambda self: self._parse_properties(), 791 exp.Qualify: lambda self: self._parse_qualify(), 792 exp.Returning: lambda self: self._parse_returning(), 793 exp.Select: lambda self: self._parse_select(), 794 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 795 exp.Table: lambda self: self._parse_table_parts(), 796 exp.TableAlias: lambda self: self._parse_table_alias(), 797 exp.Tuple: lambda self: self._parse_value(), 798 exp.Whens: lambda self: self._parse_when_matched(), 799 exp.Where: lambda self: self._parse_where(), 800 exp.Window: lambda self: self._parse_named_window(), 801 exp.With: lambda self: self._parse_with(), 802 "JOIN_TYPE": lambda self: self._parse_join_parts(), 803 } 804 805 STATEMENT_PARSERS = { 806 TokenType.ALTER: lambda self: self._parse_alter(), 807 TokenType.ANALYZE: lambda self: self._parse_analyze(), 808 TokenType.BEGIN: lambda self: self._parse_transaction(), 809 TokenType.CACHE: lambda self: self._parse_cache(), 810 TokenType.COMMENT: lambda self: self._parse_comment(), 811 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 812 TokenType.COPY: lambda self: self._parse_copy(), 813 TokenType.CREATE: lambda self: self._parse_create(), 814 TokenType.DELETE: lambda self: self._parse_delete(), 815 TokenType.DESC: lambda self: self._parse_describe(), 816 TokenType.DESCRIBE: lambda self: self._parse_describe(), 817 TokenType.DROP: lambda self: self._parse_drop(), 818 TokenType.GRANT: lambda self: self._parse_grant(), 819 TokenType.INSERT: lambda self: self._parse_insert(), 820 TokenType.KILL: lambda self: self._parse_kill(), 821 TokenType.LOAD: lambda self: self._parse_load(), 822 TokenType.MERGE: lambda self: self._parse_merge(), 823 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 824 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 825 TokenType.REFRESH: lambda self: self._parse_refresh(), 826 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 827 TokenType.SET: lambda self: self._parse_set(), 828 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 829 TokenType.UNCACHE: lambda self: self._parse_uncache(), 830 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 831 TokenType.UPDATE: lambda self: self._parse_update(), 832 TokenType.USE: lambda self: self.expression( 833 exp.Use, 834 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 835 this=self._parse_table(schema=False), 836 ), 837 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 838 } 839 840 UNARY_PARSERS = { 841 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 842 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 843 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 844 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 845 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 846 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 847 } 848 849 STRING_PARSERS = { 850 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 851 exp.RawString, this=token.text 852 ), 853 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 854 exp.National, this=token.text 855 ), 856 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 857 TokenType.STRING: lambda self, token: self.expression( 858 exp.Literal, this=token.text, is_string=True 859 ), 860 TokenType.UNICODE_STRING: lambda self, token: self.expression( 861 exp.UnicodeString, 862 this=token.text, 863 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 864 ), 865 } 866 867 NUMERIC_PARSERS = { 868 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 869 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 870 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 871 TokenType.NUMBER: lambda self, token: self.expression( 872 exp.Literal, this=token.text, is_string=False 873 ), 874 } 875 876 PRIMARY_PARSERS = { 877 **STRING_PARSERS, 878 **NUMERIC_PARSERS, 879 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 880 TokenType.NULL: lambda self, _: self.expression(exp.Null), 881 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 882 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 883 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 884 TokenType.STAR: lambda self, _: self._parse_star_ops(), 885 } 886 887 PLACEHOLDER_PARSERS = { 888 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 889 TokenType.PARAMETER: lambda self: self._parse_parameter(), 890 TokenType.COLON: lambda self: ( 891 self.expression(exp.Placeholder, this=self._prev.text) 892 if self._match_set(self.ID_VAR_TOKENS) 893 else None 894 ), 895 } 896 897 RANGE_PARSERS = { 898 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 899 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 900 TokenType.GLOB: binary_range_parser(exp.Glob), 901 TokenType.ILIKE: binary_range_parser(exp.ILike), 902 TokenType.IN: lambda self, this: self._parse_in(this), 903 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 904 TokenType.IS: lambda self, this: self._parse_is(this), 905 TokenType.LIKE: binary_range_parser(exp.Like), 906 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 907 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 908 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 909 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 910 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 911 } 912 913 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 914 "ALLOWED_VALUES": lambda self: self.expression( 915 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 916 ), 917 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 918 "AUTO": lambda self: self._parse_auto_property(), 919 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 920 "BACKUP": lambda self: self.expression( 921 exp.BackupProperty, this=self._parse_var(any_token=True) 922 ), 923 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 924 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 925 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 926 "CHECKSUM": lambda self: self._parse_checksum(), 927 "CLUSTER BY": lambda self: self._parse_cluster(), 928 "CLUSTERED": lambda self: self._parse_clustered_by(), 929 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 930 exp.CollateProperty, **kwargs 931 ), 932 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 933 "CONTAINS": lambda self: self._parse_contains_property(), 934 "COPY": lambda self: self._parse_copy_property(), 935 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 936 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 937 "DEFINER": lambda self: self._parse_definer(), 938 "DETERMINISTIC": lambda self: self.expression( 939 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 940 ), 941 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 942 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 943 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 944 "DISTKEY": lambda self: self._parse_distkey(), 945 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 946 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 947 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 948 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 949 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 950 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 951 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 952 "FREESPACE": lambda self: self._parse_freespace(), 953 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 954 "HEAP": lambda self: self.expression(exp.HeapProperty), 955 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 956 "IMMUTABLE": lambda self: self.expression( 957 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 958 ), 959 "INHERITS": lambda self: self.expression( 960 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 961 ), 962 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 963 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 964 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 965 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 966 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 967 "LIKE": lambda self: self._parse_create_like(), 968 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 969 "LOCK": lambda self: self._parse_locking(), 970 "LOCKING": lambda self: self._parse_locking(), 971 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 972 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 973 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 974 "MODIFIES": lambda self: self._parse_modifies_property(), 975 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 976 "NO": lambda self: self._parse_no_property(), 977 "ON": lambda self: self._parse_on_property(), 978 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 979 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 980 "PARTITION": lambda self: self._parse_partitioned_of(), 981 "PARTITION BY": lambda self: self._parse_partitioned_by(), 982 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 983 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 984 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 985 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 986 "READS": lambda self: self._parse_reads_property(), 987 "REMOTE": lambda self: self._parse_remote_with_connection(), 988 "RETURNS": lambda self: self._parse_returns(), 989 "STRICT": lambda self: self.expression(exp.StrictProperty), 990 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 991 "ROW": lambda self: self._parse_row(), 992 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 993 "SAMPLE": lambda self: self.expression( 994 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 995 ), 996 "SECURE": lambda self: self.expression(exp.SecureProperty), 997 "SECURITY": lambda self: self._parse_security(), 998 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 999 "SETTINGS": lambda self: self._parse_settings_property(), 1000 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1001 "SORTKEY": lambda self: self._parse_sortkey(), 1002 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1003 "STABLE": lambda self: self.expression( 1004 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1005 ), 1006 "STORED": lambda self: self._parse_stored(), 1007 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1008 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1009 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1010 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1011 "TO": lambda self: self._parse_to_table(), 1012 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1013 "TRANSFORM": lambda self: self.expression( 1014 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1015 ), 1016 "TTL": lambda self: self._parse_ttl(), 1017 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1018 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1019 "VOLATILE": lambda self: self._parse_volatile_property(), 1020 "WITH": lambda self: self._parse_with_property(), 1021 } 1022 1023 CONSTRAINT_PARSERS = { 1024 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1025 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1026 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1027 "CHARACTER SET": lambda self: self.expression( 1028 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1029 ), 1030 "CHECK": lambda self: self.expression( 1031 exp.CheckColumnConstraint, 1032 this=self._parse_wrapped(self._parse_assignment), 1033 enforced=self._match_text_seq("ENFORCED"), 1034 ), 1035 "COLLATE": lambda self: self.expression( 1036 exp.CollateColumnConstraint, 1037 this=self._parse_identifier() or self._parse_column(), 1038 ), 1039 "COMMENT": lambda self: self.expression( 1040 exp.CommentColumnConstraint, this=self._parse_string() 1041 ), 1042 "COMPRESS": lambda self: self._parse_compress(), 1043 "CLUSTERED": lambda self: self.expression( 1044 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1045 ), 1046 "NONCLUSTERED": lambda self: self.expression( 1047 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1048 ), 1049 "DEFAULT": lambda self: self.expression( 1050 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1051 ), 1052 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1053 "EPHEMERAL": lambda self: self.expression( 1054 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1055 ), 1056 "EXCLUDE": lambda self: self.expression( 1057 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1058 ), 1059 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1060 "FORMAT": lambda self: self.expression( 1061 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1062 ), 1063 "GENERATED": lambda self: self._parse_generated_as_identity(), 1064 "IDENTITY": lambda self: self._parse_auto_increment(), 1065 "INLINE": lambda self: self._parse_inline(), 1066 "LIKE": lambda self: self._parse_create_like(), 1067 "NOT": lambda self: self._parse_not_constraint(), 1068 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1069 "ON": lambda self: ( 1070 self._match(TokenType.UPDATE) 1071 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1072 ) 1073 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1074 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1075 "PERIOD": lambda self: self._parse_period_for_system_time(), 1076 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1077 "REFERENCES": lambda self: self._parse_references(match=False), 1078 "TITLE": lambda self: self.expression( 1079 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1080 ), 1081 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1082 "UNIQUE": lambda self: self._parse_unique(), 1083 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1084 "WATERMARK": lambda self: self.expression( 1085 exp.WatermarkColumnConstraint, 1086 this=self._match(TokenType.FOR) and self._parse_column(), 1087 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1088 ), 1089 "WITH": lambda self: self.expression( 1090 exp.Properties, expressions=self._parse_wrapped_properties() 1091 ), 1092 } 1093 1094 ALTER_PARSERS = { 1095 "ADD": lambda self: self._parse_alter_table_add(), 1096 "AS": lambda self: self._parse_select(), 1097 "ALTER": lambda self: self._parse_alter_table_alter(), 1098 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1099 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1100 "DROP": lambda self: self._parse_alter_table_drop(), 1101 "RENAME": lambda self: self._parse_alter_table_rename(), 1102 "SET": lambda self: self._parse_alter_table_set(), 1103 "SWAP": lambda self: self.expression( 1104 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1105 ), 1106 } 1107 1108 ALTER_ALTER_PARSERS = { 1109 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1110 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1111 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1112 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1113 } 1114 1115 SCHEMA_UNNAMED_CONSTRAINTS = { 1116 "CHECK", 1117 "EXCLUDE", 1118 "FOREIGN KEY", 1119 "LIKE", 1120 "PERIOD", 1121 "PRIMARY KEY", 1122 "UNIQUE", 1123 "WATERMARK", 1124 } 1125 1126 NO_PAREN_FUNCTION_PARSERS = { 1127 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1128 "CASE": lambda self: self._parse_case(), 1129 "CONNECT_BY_ROOT": lambda self: self.expression( 1130 exp.ConnectByRoot, this=self._parse_column() 1131 ), 1132 "IF": lambda self: self._parse_if(), 1133 } 1134 1135 INVALID_FUNC_NAME_TOKENS = { 1136 TokenType.IDENTIFIER, 1137 TokenType.STRING, 1138 } 1139 1140 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1141 1142 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1143 1144 FUNCTION_PARSERS = { 1145 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1146 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1147 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1148 "DECODE": lambda self: self._parse_decode(), 1149 "EXTRACT": lambda self: self._parse_extract(), 1150 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1151 "GAP_FILL": lambda self: self._parse_gap_fill(), 1152 "JSON_OBJECT": lambda self: self._parse_json_object(), 1153 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1154 "JSON_TABLE": lambda self: self._parse_json_table(), 1155 "MATCH": lambda self: self._parse_match_against(), 1156 "NORMALIZE": lambda self: self._parse_normalize(), 1157 "OPENJSON": lambda self: self._parse_open_json(), 1158 "OVERLAY": lambda self: self._parse_overlay(), 1159 "POSITION": lambda self: self._parse_position(), 1160 "PREDICT": lambda self: self._parse_predict(), 1161 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1162 "STRING_AGG": lambda self: self._parse_string_agg(), 1163 "SUBSTRING": lambda self: self._parse_substring(), 1164 "TRIM": lambda self: self._parse_trim(), 1165 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1166 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1167 "XMLELEMENT": lambda self: self.expression( 1168 exp.XMLElement, 1169 this=self._match_text_seq("NAME") and self._parse_id_var(), 1170 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1171 ), 1172 "XMLTABLE": lambda self: self._parse_xml_table(), 1173 } 1174 1175 QUERY_MODIFIER_PARSERS = { 1176 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1177 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1178 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1179 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1180 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1181 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1182 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1183 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1184 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1185 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1186 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1187 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1188 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1189 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1190 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1191 TokenType.CLUSTER_BY: lambda self: ( 1192 "cluster", 1193 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1194 ), 1195 TokenType.DISTRIBUTE_BY: lambda self: ( 1196 "distribute", 1197 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1198 ), 1199 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1200 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1201 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1202 } 1203 1204 SET_PARSERS = { 1205 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1206 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1207 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1208 "TRANSACTION": lambda self: self._parse_set_transaction(), 1209 } 1210 1211 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1212 1213 TYPE_LITERAL_PARSERS = { 1214 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1215 } 1216 1217 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1218 1219 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1220 1221 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1222 1223 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1224 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1225 "ISOLATION": ( 1226 ("LEVEL", "REPEATABLE", "READ"), 1227 ("LEVEL", "READ", "COMMITTED"), 1228 ("LEVEL", "READ", "UNCOMITTED"), 1229 ("LEVEL", "SERIALIZABLE"), 1230 ), 1231 "READ": ("WRITE", "ONLY"), 1232 } 1233 1234 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1235 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1236 ) 1237 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1238 1239 CREATE_SEQUENCE: OPTIONS_TYPE = { 1240 "SCALE": ("EXTEND", "NOEXTEND"), 1241 "SHARD": ("EXTEND", "NOEXTEND"), 1242 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1243 **dict.fromkeys( 1244 ( 1245 "SESSION", 1246 "GLOBAL", 1247 "KEEP", 1248 "NOKEEP", 1249 "ORDER", 1250 "NOORDER", 1251 "NOCACHE", 1252 "CYCLE", 1253 "NOCYCLE", 1254 "NOMINVALUE", 1255 "NOMAXVALUE", 1256 "NOSCALE", 1257 "NOSHARD", 1258 ), 1259 tuple(), 1260 ), 1261 } 1262 1263 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1264 1265 USABLES: OPTIONS_TYPE = dict.fromkeys( 1266 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1267 ) 1268 1269 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1270 1271 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1272 "TYPE": ("EVOLUTION",), 1273 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1274 } 1275 1276 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1277 1278 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1279 1280 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1281 "NOT": ("ENFORCED",), 1282 "MATCH": ( 1283 "FULL", 1284 "PARTIAL", 1285 "SIMPLE", 1286 ), 1287 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1288 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1289 } 1290 1291 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1292 1293 CLONE_KEYWORDS = {"CLONE", "COPY"} 1294 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1295 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1296 1297 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1298 1299 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1300 1301 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1302 1303 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1304 1305 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1306 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1307 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1308 1309 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1310 1311 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1312 1313 ADD_CONSTRAINT_TOKENS = { 1314 TokenType.CONSTRAINT, 1315 TokenType.FOREIGN_KEY, 1316 TokenType.INDEX, 1317 TokenType.KEY, 1318 TokenType.PRIMARY_KEY, 1319 TokenType.UNIQUE, 1320 } 1321 1322 DISTINCT_TOKENS = {TokenType.DISTINCT} 1323 1324 NULL_TOKENS = {TokenType.NULL} 1325 1326 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1327 1328 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1329 1330 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1331 1332 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1333 1334 ODBC_DATETIME_LITERALS = { 1335 "d": exp.Date, 1336 "t": exp.Time, 1337 "ts": exp.Timestamp, 1338 } 1339 1340 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1341 1342 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1343 1344 # The style options for the DESCRIBE statement 1345 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1346 1347 # The style options for the ANALYZE statement 1348 ANALYZE_STYLES = { 1349 "BUFFER_USAGE_LIMIT", 1350 "FULL", 1351 "LOCAL", 1352 "NO_WRITE_TO_BINLOG", 1353 "SAMPLE", 1354 "SKIP_LOCKED", 1355 "VERBOSE", 1356 } 1357 1358 ANALYZE_EXPRESSION_PARSERS = { 1359 "ALL": lambda self: self._parse_analyze_columns(), 1360 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1361 "DELETE": lambda self: self._parse_analyze_delete(), 1362 "DROP": lambda self: self._parse_analyze_histogram(), 1363 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1364 "LIST": lambda self: self._parse_analyze_list(), 1365 "PREDICATE": lambda self: self._parse_analyze_columns(), 1366 "UPDATE": lambda self: self._parse_analyze_histogram(), 1367 "VALIDATE": lambda self: self._parse_analyze_validate(), 1368 } 1369 1370 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1371 1372 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1373 1374 OPERATION_MODIFIERS: t.Set[str] = set() 1375 1376 STRICT_CAST = True 1377 1378 PREFIXED_PIVOT_COLUMNS = False 1379 IDENTIFY_PIVOT_STRINGS = False 1380 1381 LOG_DEFAULTS_TO_LN = False 1382 1383 # Whether ADD is present for each column added by ALTER TABLE 1384 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1385 1386 # Whether the table sample clause expects CSV syntax 1387 TABLESAMPLE_CSV = False 1388 1389 # The default method used for table sampling 1390 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1391 1392 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1393 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1394 1395 # Whether the TRIM function expects the characters to trim as its first argument 1396 TRIM_PATTERN_FIRST = False 1397 1398 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1399 STRING_ALIASES = False 1400 1401 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1402 MODIFIERS_ATTACHED_TO_SET_OP = True 1403 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1404 1405 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1406 NO_PAREN_IF_COMMANDS = True 1407 1408 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1409 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1410 1411 # Whether the `:` operator is used to extract a value from a VARIANT column 1412 COLON_IS_VARIANT_EXTRACT = False 1413 1414 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1415 # If this is True and '(' is not found, the keyword will be treated as an identifier 1416 VALUES_FOLLOWED_BY_PAREN = True 1417 1418 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1419 SUPPORTS_IMPLICIT_UNNEST = False 1420 1421 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1422 INTERVAL_SPANS = True 1423 1424 # Whether a PARTITION clause can follow a table reference 1425 SUPPORTS_PARTITION_SELECTION = False 1426 1427 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1428 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1429 1430 # Whether the 'AS' keyword is optional in the CTE definition syntax 1431 OPTIONAL_ALIAS_TOKEN_CTE = True 1432 1433 __slots__ = ( 1434 "error_level", 1435 "error_message_context", 1436 "max_errors", 1437 "dialect", 1438 "sql", 1439 "errors", 1440 "_tokens", 1441 "_index", 1442 "_curr", 1443 "_next", 1444 "_prev", 1445 "_prev_comments", 1446 ) 1447 1448 # Autofilled 1449 SHOW_TRIE: t.Dict = {} 1450 SET_TRIE: t.Dict = {} 1451 1452 def __init__( 1453 self, 1454 error_level: t.Optional[ErrorLevel] = None, 1455 error_message_context: int = 100, 1456 max_errors: int = 3, 1457 dialect: DialectType = None, 1458 ): 1459 from sqlglot.dialects import Dialect 1460 1461 self.error_level = error_level or ErrorLevel.IMMEDIATE 1462 self.error_message_context = error_message_context 1463 self.max_errors = max_errors 1464 self.dialect = Dialect.get_or_raise(dialect) 1465 self.reset() 1466 1467 def reset(self): 1468 self.sql = "" 1469 self.errors = [] 1470 self._tokens = [] 1471 self._index = 0 1472 self._curr = None 1473 self._next = None 1474 self._prev = None 1475 self._prev_comments = None 1476 1477 def parse( 1478 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1479 ) -> t.List[t.Optional[exp.Expression]]: 1480 """ 1481 Parses a list of tokens and returns a list of syntax trees, one tree 1482 per parsed SQL statement. 1483 1484 Args: 1485 raw_tokens: The list of tokens. 1486 sql: The original SQL string, used to produce helpful debug messages. 1487 1488 Returns: 1489 The list of the produced syntax trees. 1490 """ 1491 return self._parse( 1492 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1493 ) 1494 1495 def parse_into( 1496 self, 1497 expression_types: exp.IntoType, 1498 raw_tokens: t.List[Token], 1499 sql: t.Optional[str] = None, 1500 ) -> t.List[t.Optional[exp.Expression]]: 1501 """ 1502 Parses a list of tokens into a given Expression type. If a collection of Expression 1503 types is given instead, this method will try to parse the token list into each one 1504 of them, stopping at the first for which the parsing succeeds. 1505 1506 Args: 1507 expression_types: The expression type(s) to try and parse the token list into. 1508 raw_tokens: The list of tokens. 1509 sql: The original SQL string, used to produce helpful debug messages. 1510 1511 Returns: 1512 The target Expression. 1513 """ 1514 errors = [] 1515 for expression_type in ensure_list(expression_types): 1516 parser = self.EXPRESSION_PARSERS.get(expression_type) 1517 if not parser: 1518 raise TypeError(f"No parser registered for {expression_type}") 1519 1520 try: 1521 return self._parse(parser, raw_tokens, sql) 1522 except ParseError as e: 1523 e.errors[0]["into_expression"] = expression_type 1524 errors.append(e) 1525 1526 raise ParseError( 1527 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1528 errors=merge_errors(errors), 1529 ) from errors[-1] 1530 1531 def _parse( 1532 self, 1533 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1534 raw_tokens: t.List[Token], 1535 sql: t.Optional[str] = None, 1536 ) -> t.List[t.Optional[exp.Expression]]: 1537 self.reset() 1538 self.sql = sql or "" 1539 1540 total = len(raw_tokens) 1541 chunks: t.List[t.List[Token]] = [[]] 1542 1543 for i, token in enumerate(raw_tokens): 1544 if token.token_type == TokenType.SEMICOLON: 1545 if token.comments: 1546 chunks.append([token]) 1547 1548 if i < total - 1: 1549 chunks.append([]) 1550 else: 1551 chunks[-1].append(token) 1552 1553 expressions = [] 1554 1555 for tokens in chunks: 1556 self._index = -1 1557 self._tokens = tokens 1558 self._advance() 1559 1560 expressions.append(parse_method(self)) 1561 1562 if self._index < len(self._tokens): 1563 self.raise_error("Invalid expression / Unexpected token") 1564 1565 self.check_errors() 1566 1567 return expressions 1568 1569 def check_errors(self) -> None: 1570 """Logs or raises any found errors, depending on the chosen error level setting.""" 1571 if self.error_level == ErrorLevel.WARN: 1572 for error in self.errors: 1573 logger.error(str(error)) 1574 elif self.error_level == ErrorLevel.RAISE and self.errors: 1575 raise ParseError( 1576 concat_messages(self.errors, self.max_errors), 1577 errors=merge_errors(self.errors), 1578 ) 1579 1580 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1581 """ 1582 Appends an error in the list of recorded errors or raises it, depending on the chosen 1583 error level setting. 1584 """ 1585 token = token or self._curr or self._prev or Token.string("") 1586 start = token.start 1587 end = token.end + 1 1588 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1589 highlight = self.sql[start:end] 1590 end_context = self.sql[end : end + self.error_message_context] 1591 1592 error = ParseError.new( 1593 f"{message}. Line {token.line}, Col: {token.col}.\n" 1594 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1595 description=message, 1596 line=token.line, 1597 col=token.col, 1598 start_context=start_context, 1599 highlight=highlight, 1600 end_context=end_context, 1601 ) 1602 1603 if self.error_level == ErrorLevel.IMMEDIATE: 1604 raise error 1605 1606 self.errors.append(error) 1607 1608 def expression( 1609 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1610 ) -> E: 1611 """ 1612 Creates a new, validated Expression. 1613 1614 Args: 1615 exp_class: The expression class to instantiate. 1616 comments: An optional list of comments to attach to the expression. 1617 kwargs: The arguments to set for the expression along with their respective values. 1618 1619 Returns: 1620 The target expression. 1621 """ 1622 instance = exp_class(**kwargs) 1623 instance.add_comments(comments) if comments else self._add_comments(instance) 1624 return self.validate_expression(instance) 1625 1626 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1627 if expression and self._prev_comments: 1628 expression.add_comments(self._prev_comments) 1629 self._prev_comments = None 1630 1631 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1632 """ 1633 Validates an Expression, making sure that all its mandatory arguments are set. 1634 1635 Args: 1636 expression: The expression to validate. 1637 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1638 1639 Returns: 1640 The validated expression. 1641 """ 1642 if self.error_level != ErrorLevel.IGNORE: 1643 for error_message in expression.error_messages(args): 1644 self.raise_error(error_message) 1645 1646 return expression 1647 1648 def _find_sql(self, start: Token, end: Token) -> str: 1649 return self.sql[start.start : end.end + 1] 1650 1651 def _is_connected(self) -> bool: 1652 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1653 1654 def _advance(self, times: int = 1) -> None: 1655 self._index += times 1656 self._curr = seq_get(self._tokens, self._index) 1657 self._next = seq_get(self._tokens, self._index + 1) 1658 1659 if self._index > 0: 1660 self._prev = self._tokens[self._index - 1] 1661 self._prev_comments = self._prev.comments 1662 else: 1663 self._prev = None 1664 self._prev_comments = None 1665 1666 def _retreat(self, index: int) -> None: 1667 if index != self._index: 1668 self._advance(index - self._index) 1669 1670 def _warn_unsupported(self) -> None: 1671 if len(self._tokens) <= 1: 1672 return 1673 1674 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1675 # interested in emitting a warning for the one being currently processed. 1676 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1677 1678 logger.warning( 1679 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1680 ) 1681 1682 def _parse_command(self) -> exp.Command: 1683 self._warn_unsupported() 1684 return self.expression( 1685 exp.Command, 1686 comments=self._prev_comments, 1687 this=self._prev.text.upper(), 1688 expression=self._parse_string(), 1689 ) 1690 1691 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1692 """ 1693 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1694 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1695 solve this by setting & resetting the parser state accordingly 1696 """ 1697 index = self._index 1698 error_level = self.error_level 1699 1700 self.error_level = ErrorLevel.IMMEDIATE 1701 try: 1702 this = parse_method() 1703 except ParseError: 1704 this = None 1705 finally: 1706 if not this or retreat: 1707 self._retreat(index) 1708 self.error_level = error_level 1709 1710 return this 1711 1712 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1713 start = self._prev 1714 exists = self._parse_exists() if allow_exists else None 1715 1716 self._match(TokenType.ON) 1717 1718 materialized = self._match_text_seq("MATERIALIZED") 1719 kind = self._match_set(self.CREATABLES) and self._prev 1720 if not kind: 1721 return self._parse_as_command(start) 1722 1723 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1724 this = self._parse_user_defined_function(kind=kind.token_type) 1725 elif kind.token_type == TokenType.TABLE: 1726 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1727 elif kind.token_type == TokenType.COLUMN: 1728 this = self._parse_column() 1729 else: 1730 this = self._parse_id_var() 1731 1732 self._match(TokenType.IS) 1733 1734 return self.expression( 1735 exp.Comment, 1736 this=this, 1737 kind=kind.text, 1738 expression=self._parse_string(), 1739 exists=exists, 1740 materialized=materialized, 1741 ) 1742 1743 def _parse_to_table( 1744 self, 1745 ) -> exp.ToTableProperty: 1746 table = self._parse_table_parts(schema=True) 1747 return self.expression(exp.ToTableProperty, this=table) 1748 1749 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1750 def _parse_ttl(self) -> exp.Expression: 1751 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1752 this = self._parse_bitwise() 1753 1754 if self._match_text_seq("DELETE"): 1755 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1756 if self._match_text_seq("RECOMPRESS"): 1757 return self.expression( 1758 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1759 ) 1760 if self._match_text_seq("TO", "DISK"): 1761 return self.expression( 1762 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1763 ) 1764 if self._match_text_seq("TO", "VOLUME"): 1765 return self.expression( 1766 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1767 ) 1768 1769 return this 1770 1771 expressions = self._parse_csv(_parse_ttl_action) 1772 where = self._parse_where() 1773 group = self._parse_group() 1774 1775 aggregates = None 1776 if group and self._match(TokenType.SET): 1777 aggregates = self._parse_csv(self._parse_set_item) 1778 1779 return self.expression( 1780 exp.MergeTreeTTL, 1781 expressions=expressions, 1782 where=where, 1783 group=group, 1784 aggregates=aggregates, 1785 ) 1786 1787 def _parse_statement(self) -> t.Optional[exp.Expression]: 1788 if self._curr is None: 1789 return None 1790 1791 if self._match_set(self.STATEMENT_PARSERS): 1792 comments = self._prev_comments 1793 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1794 stmt.add_comments(comments, prepend=True) 1795 return stmt 1796 1797 if self._match_set(self.dialect.tokenizer.COMMANDS): 1798 return self._parse_command() 1799 1800 expression = self._parse_expression() 1801 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1802 return self._parse_query_modifiers(expression) 1803 1804 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1805 start = self._prev 1806 temporary = self._match(TokenType.TEMPORARY) 1807 materialized = self._match_text_seq("MATERIALIZED") 1808 1809 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1810 if not kind: 1811 return self._parse_as_command(start) 1812 1813 concurrently = self._match_text_seq("CONCURRENTLY") 1814 if_exists = exists or self._parse_exists() 1815 1816 if kind == "COLUMN": 1817 this = self._parse_column() 1818 else: 1819 this = self._parse_table_parts( 1820 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1821 ) 1822 1823 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1824 1825 if self._match(TokenType.L_PAREN, advance=False): 1826 expressions = self._parse_wrapped_csv(self._parse_types) 1827 else: 1828 expressions = None 1829 1830 return self.expression( 1831 exp.Drop, 1832 exists=if_exists, 1833 this=this, 1834 expressions=expressions, 1835 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1836 temporary=temporary, 1837 materialized=materialized, 1838 cascade=self._match_text_seq("CASCADE"), 1839 constraints=self._match_text_seq("CONSTRAINTS"), 1840 purge=self._match_text_seq("PURGE"), 1841 cluster=cluster, 1842 concurrently=concurrently, 1843 ) 1844 1845 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1846 return ( 1847 self._match_text_seq("IF") 1848 and (not not_ or self._match(TokenType.NOT)) 1849 and self._match(TokenType.EXISTS) 1850 ) 1851 1852 def _parse_create(self) -> exp.Create | exp.Command: 1853 # Note: this can't be None because we've matched a statement parser 1854 start = self._prev 1855 1856 replace = ( 1857 start.token_type == TokenType.REPLACE 1858 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1859 or self._match_pair(TokenType.OR, TokenType.ALTER) 1860 ) 1861 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1862 1863 unique = self._match(TokenType.UNIQUE) 1864 1865 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1866 clustered = True 1867 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1868 "COLUMNSTORE" 1869 ): 1870 clustered = False 1871 else: 1872 clustered = None 1873 1874 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1875 self._advance() 1876 1877 properties = None 1878 create_token = self._match_set(self.CREATABLES) and self._prev 1879 1880 if not create_token: 1881 # exp.Properties.Location.POST_CREATE 1882 properties = self._parse_properties() 1883 create_token = self._match_set(self.CREATABLES) and self._prev 1884 1885 if not properties or not create_token: 1886 return self._parse_as_command(start) 1887 1888 concurrently = self._match_text_seq("CONCURRENTLY") 1889 exists = self._parse_exists(not_=True) 1890 this = None 1891 expression: t.Optional[exp.Expression] = None 1892 indexes = None 1893 no_schema_binding = None 1894 begin = None 1895 end = None 1896 clone = None 1897 1898 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1899 nonlocal properties 1900 if properties and temp_props: 1901 properties.expressions.extend(temp_props.expressions) 1902 elif temp_props: 1903 properties = temp_props 1904 1905 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1906 this = self._parse_user_defined_function(kind=create_token.token_type) 1907 1908 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1909 extend_props(self._parse_properties()) 1910 1911 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1912 extend_props(self._parse_properties()) 1913 1914 if not expression: 1915 if self._match(TokenType.COMMAND): 1916 expression = self._parse_as_command(self._prev) 1917 else: 1918 begin = self._match(TokenType.BEGIN) 1919 return_ = self._match_text_seq("RETURN") 1920 1921 if self._match(TokenType.STRING, advance=False): 1922 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1923 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1924 expression = self._parse_string() 1925 extend_props(self._parse_properties()) 1926 else: 1927 expression = self._parse_user_defined_function_expression() 1928 1929 end = self._match_text_seq("END") 1930 1931 if return_: 1932 expression = self.expression(exp.Return, this=expression) 1933 elif create_token.token_type == TokenType.INDEX: 1934 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1935 if not self._match(TokenType.ON): 1936 index = self._parse_id_var() 1937 anonymous = False 1938 else: 1939 index = None 1940 anonymous = True 1941 1942 this = self._parse_index(index=index, anonymous=anonymous) 1943 elif create_token.token_type in self.DB_CREATABLES: 1944 table_parts = self._parse_table_parts( 1945 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1946 ) 1947 1948 # exp.Properties.Location.POST_NAME 1949 self._match(TokenType.COMMA) 1950 extend_props(self._parse_properties(before=True)) 1951 1952 this = self._parse_schema(this=table_parts) 1953 1954 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1955 extend_props(self._parse_properties()) 1956 1957 self._match(TokenType.ALIAS) 1958 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1959 # exp.Properties.Location.POST_ALIAS 1960 extend_props(self._parse_properties()) 1961 1962 if create_token.token_type == TokenType.SEQUENCE: 1963 expression = self._parse_types() 1964 extend_props(self._parse_properties()) 1965 else: 1966 expression = self._parse_ddl_select() 1967 1968 if create_token.token_type == TokenType.TABLE: 1969 # exp.Properties.Location.POST_EXPRESSION 1970 extend_props(self._parse_properties()) 1971 1972 indexes = [] 1973 while True: 1974 index = self._parse_index() 1975 1976 # exp.Properties.Location.POST_INDEX 1977 extend_props(self._parse_properties()) 1978 if not index: 1979 break 1980 else: 1981 self._match(TokenType.COMMA) 1982 indexes.append(index) 1983 elif create_token.token_type == TokenType.VIEW: 1984 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1985 no_schema_binding = True 1986 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 1987 extend_props(self._parse_properties()) 1988 1989 shallow = self._match_text_seq("SHALLOW") 1990 1991 if self._match_texts(self.CLONE_KEYWORDS): 1992 copy = self._prev.text.lower() == "copy" 1993 clone = self.expression( 1994 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1995 ) 1996 1997 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1998 return self._parse_as_command(start) 1999 2000 create_kind_text = create_token.text.upper() 2001 return self.expression( 2002 exp.Create, 2003 this=this, 2004 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2005 replace=replace, 2006 refresh=refresh, 2007 unique=unique, 2008 expression=expression, 2009 exists=exists, 2010 properties=properties, 2011 indexes=indexes, 2012 no_schema_binding=no_schema_binding, 2013 begin=begin, 2014 end=end, 2015 clone=clone, 2016 concurrently=concurrently, 2017 clustered=clustered, 2018 ) 2019 2020 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2021 seq = exp.SequenceProperties() 2022 2023 options = [] 2024 index = self._index 2025 2026 while self._curr: 2027 self._match(TokenType.COMMA) 2028 if self._match_text_seq("INCREMENT"): 2029 self._match_text_seq("BY") 2030 self._match_text_seq("=") 2031 seq.set("increment", self._parse_term()) 2032 elif self._match_text_seq("MINVALUE"): 2033 seq.set("minvalue", self._parse_term()) 2034 elif self._match_text_seq("MAXVALUE"): 2035 seq.set("maxvalue", self._parse_term()) 2036 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2037 self._match_text_seq("=") 2038 seq.set("start", self._parse_term()) 2039 elif self._match_text_seq("CACHE"): 2040 # T-SQL allows empty CACHE which is initialized dynamically 2041 seq.set("cache", self._parse_number() or True) 2042 elif self._match_text_seq("OWNED", "BY"): 2043 # "OWNED BY NONE" is the default 2044 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2045 else: 2046 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2047 if opt: 2048 options.append(opt) 2049 else: 2050 break 2051 2052 seq.set("options", options if options else None) 2053 return None if self._index == index else seq 2054 2055 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2056 # only used for teradata currently 2057 self._match(TokenType.COMMA) 2058 2059 kwargs = { 2060 "no": self._match_text_seq("NO"), 2061 "dual": self._match_text_seq("DUAL"), 2062 "before": self._match_text_seq("BEFORE"), 2063 "default": self._match_text_seq("DEFAULT"), 2064 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2065 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2066 "after": self._match_text_seq("AFTER"), 2067 "minimum": self._match_texts(("MIN", "MINIMUM")), 2068 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2069 } 2070 2071 if self._match_texts(self.PROPERTY_PARSERS): 2072 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2073 try: 2074 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2075 except TypeError: 2076 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2077 2078 return None 2079 2080 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2081 return self._parse_wrapped_csv(self._parse_property) 2082 2083 def _parse_property(self) -> t.Optional[exp.Expression]: 2084 if self._match_texts(self.PROPERTY_PARSERS): 2085 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2086 2087 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2088 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2089 2090 if self._match_text_seq("COMPOUND", "SORTKEY"): 2091 return self._parse_sortkey(compound=True) 2092 2093 if self._match_text_seq("SQL", "SECURITY"): 2094 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2095 2096 index = self._index 2097 key = self._parse_column() 2098 2099 if not self._match(TokenType.EQ): 2100 self._retreat(index) 2101 return self._parse_sequence_properties() 2102 2103 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2104 if isinstance(key, exp.Column): 2105 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2106 2107 value = self._parse_bitwise() or self._parse_var(any_token=True) 2108 2109 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2110 if isinstance(value, exp.Column): 2111 value = exp.var(value.name) 2112 2113 return self.expression(exp.Property, this=key, value=value) 2114 2115 def _parse_stored(self) -> exp.FileFormatProperty: 2116 self._match(TokenType.ALIAS) 2117 2118 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2119 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2120 2121 return self.expression( 2122 exp.FileFormatProperty, 2123 this=( 2124 self.expression( 2125 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2126 ) 2127 if input_format or output_format 2128 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2129 ), 2130 ) 2131 2132 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2133 field = self._parse_field() 2134 if isinstance(field, exp.Identifier) and not field.quoted: 2135 field = exp.var(field) 2136 2137 return field 2138 2139 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2140 self._match(TokenType.EQ) 2141 self._match(TokenType.ALIAS) 2142 2143 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2144 2145 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2146 properties = [] 2147 while True: 2148 if before: 2149 prop = self._parse_property_before() 2150 else: 2151 prop = self._parse_property() 2152 if not prop: 2153 break 2154 for p in ensure_list(prop): 2155 properties.append(p) 2156 2157 if properties: 2158 return self.expression(exp.Properties, expressions=properties) 2159 2160 return None 2161 2162 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2163 return self.expression( 2164 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2165 ) 2166 2167 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2168 if self._match_texts(("DEFINER", "INVOKER")): 2169 security_specifier = self._prev.text.upper() 2170 return self.expression(exp.SecurityProperty, this=security_specifier) 2171 return None 2172 2173 def _parse_settings_property(self) -> exp.SettingsProperty: 2174 return self.expression( 2175 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2176 ) 2177 2178 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2179 if self._index >= 2: 2180 pre_volatile_token = self._tokens[self._index - 2] 2181 else: 2182 pre_volatile_token = None 2183 2184 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2185 return exp.VolatileProperty() 2186 2187 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2188 2189 def _parse_retention_period(self) -> exp.Var: 2190 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2191 number = self._parse_number() 2192 number_str = f"{number} " if number else "" 2193 unit = self._parse_var(any_token=True) 2194 return exp.var(f"{number_str}{unit}") 2195 2196 def _parse_system_versioning_property( 2197 self, with_: bool = False 2198 ) -> exp.WithSystemVersioningProperty: 2199 self._match(TokenType.EQ) 2200 prop = self.expression( 2201 exp.WithSystemVersioningProperty, 2202 **{ # type: ignore 2203 "on": True, 2204 "with": with_, 2205 }, 2206 ) 2207 2208 if self._match_text_seq("OFF"): 2209 prop.set("on", False) 2210 return prop 2211 2212 self._match(TokenType.ON) 2213 if self._match(TokenType.L_PAREN): 2214 while self._curr and not self._match(TokenType.R_PAREN): 2215 if self._match_text_seq("HISTORY_TABLE", "="): 2216 prop.set("this", self._parse_table_parts()) 2217 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2218 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2219 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2220 prop.set("retention_period", self._parse_retention_period()) 2221 2222 self._match(TokenType.COMMA) 2223 2224 return prop 2225 2226 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2227 self._match(TokenType.EQ) 2228 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2229 prop = self.expression(exp.DataDeletionProperty, on=on) 2230 2231 if self._match(TokenType.L_PAREN): 2232 while self._curr and not self._match(TokenType.R_PAREN): 2233 if self._match_text_seq("FILTER_COLUMN", "="): 2234 prop.set("filter_column", self._parse_column()) 2235 elif self._match_text_seq("RETENTION_PERIOD", "="): 2236 prop.set("retention_period", self._parse_retention_period()) 2237 2238 self._match(TokenType.COMMA) 2239 2240 return prop 2241 2242 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2243 kind = "HASH" 2244 expressions: t.Optional[t.List[exp.Expression]] = None 2245 if self._match_text_seq("BY", "HASH"): 2246 expressions = self._parse_wrapped_csv(self._parse_id_var) 2247 elif self._match_text_seq("BY", "RANDOM"): 2248 kind = "RANDOM" 2249 2250 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2251 buckets: t.Optional[exp.Expression] = None 2252 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2253 buckets = self._parse_number() 2254 2255 return self.expression( 2256 exp.DistributedByProperty, 2257 expressions=expressions, 2258 kind=kind, 2259 buckets=buckets, 2260 order=self._parse_order(), 2261 ) 2262 2263 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2264 self._match_text_seq("KEY") 2265 expressions = self._parse_wrapped_id_vars() 2266 return self.expression(expr_type, expressions=expressions) 2267 2268 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2269 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2270 prop = self._parse_system_versioning_property(with_=True) 2271 self._match_r_paren() 2272 return prop 2273 2274 if self._match(TokenType.L_PAREN, advance=False): 2275 return self._parse_wrapped_properties() 2276 2277 if self._match_text_seq("JOURNAL"): 2278 return self._parse_withjournaltable() 2279 2280 if self._match_texts(self.VIEW_ATTRIBUTES): 2281 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2282 2283 if self._match_text_seq("DATA"): 2284 return self._parse_withdata(no=False) 2285 elif self._match_text_seq("NO", "DATA"): 2286 return self._parse_withdata(no=True) 2287 2288 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2289 return self._parse_serde_properties(with_=True) 2290 2291 if self._match(TokenType.SCHEMA): 2292 return self.expression( 2293 exp.WithSchemaBindingProperty, 2294 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2295 ) 2296 2297 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2298 return self.expression( 2299 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2300 ) 2301 2302 if not self._next: 2303 return None 2304 2305 return self._parse_withisolatedloading() 2306 2307 def _parse_procedure_option(self) -> exp.Expression | None: 2308 if self._match_text_seq("EXECUTE", "AS"): 2309 return self.expression( 2310 exp.ExecuteAsProperty, 2311 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2312 or self._parse_string(), 2313 ) 2314 2315 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2316 2317 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2318 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2319 self._match(TokenType.EQ) 2320 2321 user = self._parse_id_var() 2322 self._match(TokenType.PARAMETER) 2323 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2324 2325 if not user or not host: 2326 return None 2327 2328 return exp.DefinerProperty(this=f"{user}@{host}") 2329 2330 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2331 self._match(TokenType.TABLE) 2332 self._match(TokenType.EQ) 2333 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2334 2335 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2336 return self.expression(exp.LogProperty, no=no) 2337 2338 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2339 return self.expression(exp.JournalProperty, **kwargs) 2340 2341 def _parse_checksum(self) -> exp.ChecksumProperty: 2342 self._match(TokenType.EQ) 2343 2344 on = None 2345 if self._match(TokenType.ON): 2346 on = True 2347 elif self._match_text_seq("OFF"): 2348 on = False 2349 2350 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2351 2352 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2353 return self.expression( 2354 exp.Cluster, 2355 expressions=( 2356 self._parse_wrapped_csv(self._parse_ordered) 2357 if wrapped 2358 else self._parse_csv(self._parse_ordered) 2359 ), 2360 ) 2361 2362 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2363 self._match_text_seq("BY") 2364 2365 self._match_l_paren() 2366 expressions = self._parse_csv(self._parse_column) 2367 self._match_r_paren() 2368 2369 if self._match_text_seq("SORTED", "BY"): 2370 self._match_l_paren() 2371 sorted_by = self._parse_csv(self._parse_ordered) 2372 self._match_r_paren() 2373 else: 2374 sorted_by = None 2375 2376 self._match(TokenType.INTO) 2377 buckets = self._parse_number() 2378 self._match_text_seq("BUCKETS") 2379 2380 return self.expression( 2381 exp.ClusteredByProperty, 2382 expressions=expressions, 2383 sorted_by=sorted_by, 2384 buckets=buckets, 2385 ) 2386 2387 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2388 if not self._match_text_seq("GRANTS"): 2389 self._retreat(self._index - 1) 2390 return None 2391 2392 return self.expression(exp.CopyGrantsProperty) 2393 2394 def _parse_freespace(self) -> exp.FreespaceProperty: 2395 self._match(TokenType.EQ) 2396 return self.expression( 2397 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2398 ) 2399 2400 def _parse_mergeblockratio( 2401 self, no: bool = False, default: bool = False 2402 ) -> exp.MergeBlockRatioProperty: 2403 if self._match(TokenType.EQ): 2404 return self.expression( 2405 exp.MergeBlockRatioProperty, 2406 this=self._parse_number(), 2407 percent=self._match(TokenType.PERCENT), 2408 ) 2409 2410 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2411 2412 def _parse_datablocksize( 2413 self, 2414 default: t.Optional[bool] = None, 2415 minimum: t.Optional[bool] = None, 2416 maximum: t.Optional[bool] = None, 2417 ) -> exp.DataBlocksizeProperty: 2418 self._match(TokenType.EQ) 2419 size = self._parse_number() 2420 2421 units = None 2422 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2423 units = self._prev.text 2424 2425 return self.expression( 2426 exp.DataBlocksizeProperty, 2427 size=size, 2428 units=units, 2429 default=default, 2430 minimum=minimum, 2431 maximum=maximum, 2432 ) 2433 2434 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2435 self._match(TokenType.EQ) 2436 always = self._match_text_seq("ALWAYS") 2437 manual = self._match_text_seq("MANUAL") 2438 never = self._match_text_seq("NEVER") 2439 default = self._match_text_seq("DEFAULT") 2440 2441 autotemp = None 2442 if self._match_text_seq("AUTOTEMP"): 2443 autotemp = self._parse_schema() 2444 2445 return self.expression( 2446 exp.BlockCompressionProperty, 2447 always=always, 2448 manual=manual, 2449 never=never, 2450 default=default, 2451 autotemp=autotemp, 2452 ) 2453 2454 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2455 index = self._index 2456 no = self._match_text_seq("NO") 2457 concurrent = self._match_text_seq("CONCURRENT") 2458 2459 if not self._match_text_seq("ISOLATED", "LOADING"): 2460 self._retreat(index) 2461 return None 2462 2463 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2464 return self.expression( 2465 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2466 ) 2467 2468 def _parse_locking(self) -> exp.LockingProperty: 2469 if self._match(TokenType.TABLE): 2470 kind = "TABLE" 2471 elif self._match(TokenType.VIEW): 2472 kind = "VIEW" 2473 elif self._match(TokenType.ROW): 2474 kind = "ROW" 2475 elif self._match_text_seq("DATABASE"): 2476 kind = "DATABASE" 2477 else: 2478 kind = None 2479 2480 if kind in ("DATABASE", "TABLE", "VIEW"): 2481 this = self._parse_table_parts() 2482 else: 2483 this = None 2484 2485 if self._match(TokenType.FOR): 2486 for_or_in = "FOR" 2487 elif self._match(TokenType.IN): 2488 for_or_in = "IN" 2489 else: 2490 for_or_in = None 2491 2492 if self._match_text_seq("ACCESS"): 2493 lock_type = "ACCESS" 2494 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2495 lock_type = "EXCLUSIVE" 2496 elif self._match_text_seq("SHARE"): 2497 lock_type = "SHARE" 2498 elif self._match_text_seq("READ"): 2499 lock_type = "READ" 2500 elif self._match_text_seq("WRITE"): 2501 lock_type = "WRITE" 2502 elif self._match_text_seq("CHECKSUM"): 2503 lock_type = "CHECKSUM" 2504 else: 2505 lock_type = None 2506 2507 override = self._match_text_seq("OVERRIDE") 2508 2509 return self.expression( 2510 exp.LockingProperty, 2511 this=this, 2512 kind=kind, 2513 for_or_in=for_or_in, 2514 lock_type=lock_type, 2515 override=override, 2516 ) 2517 2518 def _parse_partition_by(self) -> t.List[exp.Expression]: 2519 if self._match(TokenType.PARTITION_BY): 2520 return self._parse_csv(self._parse_assignment) 2521 return [] 2522 2523 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2524 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2525 if self._match_text_seq("MINVALUE"): 2526 return exp.var("MINVALUE") 2527 if self._match_text_seq("MAXVALUE"): 2528 return exp.var("MAXVALUE") 2529 return self._parse_bitwise() 2530 2531 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2532 expression = None 2533 from_expressions = None 2534 to_expressions = None 2535 2536 if self._match(TokenType.IN): 2537 this = self._parse_wrapped_csv(self._parse_bitwise) 2538 elif self._match(TokenType.FROM): 2539 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2540 self._match_text_seq("TO") 2541 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2542 elif self._match_text_seq("WITH", "(", "MODULUS"): 2543 this = self._parse_number() 2544 self._match_text_seq(",", "REMAINDER") 2545 expression = self._parse_number() 2546 self._match_r_paren() 2547 else: 2548 self.raise_error("Failed to parse partition bound spec.") 2549 2550 return self.expression( 2551 exp.PartitionBoundSpec, 2552 this=this, 2553 expression=expression, 2554 from_expressions=from_expressions, 2555 to_expressions=to_expressions, 2556 ) 2557 2558 # https://www.postgresql.org/docs/current/sql-createtable.html 2559 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2560 if not self._match_text_seq("OF"): 2561 self._retreat(self._index - 1) 2562 return None 2563 2564 this = self._parse_table(schema=True) 2565 2566 if self._match(TokenType.DEFAULT): 2567 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2568 elif self._match_text_seq("FOR", "VALUES"): 2569 expression = self._parse_partition_bound_spec() 2570 else: 2571 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2572 2573 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2574 2575 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2576 self._match(TokenType.EQ) 2577 return self.expression( 2578 exp.PartitionedByProperty, 2579 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2580 ) 2581 2582 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2583 if self._match_text_seq("AND", "STATISTICS"): 2584 statistics = True 2585 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2586 statistics = False 2587 else: 2588 statistics = None 2589 2590 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2591 2592 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2593 if self._match_text_seq("SQL"): 2594 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2595 return None 2596 2597 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2598 if self._match_text_seq("SQL", "DATA"): 2599 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2600 return None 2601 2602 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2603 if self._match_text_seq("PRIMARY", "INDEX"): 2604 return exp.NoPrimaryIndexProperty() 2605 if self._match_text_seq("SQL"): 2606 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2607 return None 2608 2609 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2610 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2611 return exp.OnCommitProperty() 2612 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2613 return exp.OnCommitProperty(delete=True) 2614 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2615 2616 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2617 if self._match_text_seq("SQL", "DATA"): 2618 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2619 return None 2620 2621 def _parse_distkey(self) -> exp.DistKeyProperty: 2622 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2623 2624 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2625 table = self._parse_table(schema=True) 2626 2627 options = [] 2628 while self._match_texts(("INCLUDING", "EXCLUDING")): 2629 this = self._prev.text.upper() 2630 2631 id_var = self._parse_id_var() 2632 if not id_var: 2633 return None 2634 2635 options.append( 2636 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2637 ) 2638 2639 return self.expression(exp.LikeProperty, this=table, expressions=options) 2640 2641 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2642 return self.expression( 2643 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2644 ) 2645 2646 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2647 self._match(TokenType.EQ) 2648 return self.expression( 2649 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2650 ) 2651 2652 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2653 self._match_text_seq("WITH", "CONNECTION") 2654 return self.expression( 2655 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2656 ) 2657 2658 def _parse_returns(self) -> exp.ReturnsProperty: 2659 value: t.Optional[exp.Expression] 2660 null = None 2661 is_table = self._match(TokenType.TABLE) 2662 2663 if is_table: 2664 if self._match(TokenType.LT): 2665 value = self.expression( 2666 exp.Schema, 2667 this="TABLE", 2668 expressions=self._parse_csv(self._parse_struct_types), 2669 ) 2670 if not self._match(TokenType.GT): 2671 self.raise_error("Expecting >") 2672 else: 2673 value = self._parse_schema(exp.var("TABLE")) 2674 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2675 null = True 2676 value = None 2677 else: 2678 value = self._parse_types() 2679 2680 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2681 2682 def _parse_describe(self) -> exp.Describe: 2683 kind = self._match_set(self.CREATABLES) and self._prev.text 2684 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2685 if self._match(TokenType.DOT): 2686 style = None 2687 self._retreat(self._index - 2) 2688 2689 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2690 2691 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2692 this = self._parse_statement() 2693 else: 2694 this = self._parse_table(schema=True) 2695 2696 properties = self._parse_properties() 2697 expressions = properties.expressions if properties else None 2698 partition = self._parse_partition() 2699 return self.expression( 2700 exp.Describe, 2701 this=this, 2702 style=style, 2703 kind=kind, 2704 expressions=expressions, 2705 partition=partition, 2706 format=format, 2707 ) 2708 2709 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2710 kind = self._prev.text.upper() 2711 expressions = [] 2712 2713 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2714 if self._match(TokenType.WHEN): 2715 expression = self._parse_disjunction() 2716 self._match(TokenType.THEN) 2717 else: 2718 expression = None 2719 2720 else_ = self._match(TokenType.ELSE) 2721 2722 if not self._match(TokenType.INTO): 2723 return None 2724 2725 return self.expression( 2726 exp.ConditionalInsert, 2727 this=self.expression( 2728 exp.Insert, 2729 this=self._parse_table(schema=True), 2730 expression=self._parse_derived_table_values(), 2731 ), 2732 expression=expression, 2733 else_=else_, 2734 ) 2735 2736 expression = parse_conditional_insert() 2737 while expression is not None: 2738 expressions.append(expression) 2739 expression = parse_conditional_insert() 2740 2741 return self.expression( 2742 exp.MultitableInserts, 2743 kind=kind, 2744 comments=comments, 2745 expressions=expressions, 2746 source=self._parse_table(), 2747 ) 2748 2749 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2750 comments = [] 2751 hint = self._parse_hint() 2752 overwrite = self._match(TokenType.OVERWRITE) 2753 ignore = self._match(TokenType.IGNORE) 2754 local = self._match_text_seq("LOCAL") 2755 alternative = None 2756 is_function = None 2757 2758 if self._match_text_seq("DIRECTORY"): 2759 this: t.Optional[exp.Expression] = self.expression( 2760 exp.Directory, 2761 this=self._parse_var_or_string(), 2762 local=local, 2763 row_format=self._parse_row_format(match_row=True), 2764 ) 2765 else: 2766 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2767 comments += ensure_list(self._prev_comments) 2768 return self._parse_multitable_inserts(comments) 2769 2770 if self._match(TokenType.OR): 2771 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2772 2773 self._match(TokenType.INTO) 2774 comments += ensure_list(self._prev_comments) 2775 self._match(TokenType.TABLE) 2776 is_function = self._match(TokenType.FUNCTION) 2777 2778 this = ( 2779 self._parse_table(schema=True, parse_partition=True) 2780 if not is_function 2781 else self._parse_function() 2782 ) 2783 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2784 this.set("alias", self._parse_table_alias()) 2785 2786 returning = self._parse_returning() 2787 2788 return self.expression( 2789 exp.Insert, 2790 comments=comments, 2791 hint=hint, 2792 is_function=is_function, 2793 this=this, 2794 stored=self._match_text_seq("STORED") and self._parse_stored(), 2795 by_name=self._match_text_seq("BY", "NAME"), 2796 exists=self._parse_exists(), 2797 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2798 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2799 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2800 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2801 conflict=self._parse_on_conflict(), 2802 returning=returning or self._parse_returning(), 2803 overwrite=overwrite, 2804 alternative=alternative, 2805 ignore=ignore, 2806 source=self._match(TokenType.TABLE) and self._parse_table(), 2807 ) 2808 2809 def _parse_kill(self) -> exp.Kill: 2810 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2811 2812 return self.expression( 2813 exp.Kill, 2814 this=self._parse_primary(), 2815 kind=kind, 2816 ) 2817 2818 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2819 conflict = self._match_text_seq("ON", "CONFLICT") 2820 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2821 2822 if not conflict and not duplicate: 2823 return None 2824 2825 conflict_keys = None 2826 constraint = None 2827 2828 if conflict: 2829 if self._match_text_seq("ON", "CONSTRAINT"): 2830 constraint = self._parse_id_var() 2831 elif self._match(TokenType.L_PAREN): 2832 conflict_keys = self._parse_csv(self._parse_id_var) 2833 self._match_r_paren() 2834 2835 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2836 if self._prev.token_type == TokenType.UPDATE: 2837 self._match(TokenType.SET) 2838 expressions = self._parse_csv(self._parse_equality) 2839 else: 2840 expressions = None 2841 2842 return self.expression( 2843 exp.OnConflict, 2844 duplicate=duplicate, 2845 expressions=expressions, 2846 action=action, 2847 conflict_keys=conflict_keys, 2848 constraint=constraint, 2849 where=self._parse_where(), 2850 ) 2851 2852 def _parse_returning(self) -> t.Optional[exp.Returning]: 2853 if not self._match(TokenType.RETURNING): 2854 return None 2855 return self.expression( 2856 exp.Returning, 2857 expressions=self._parse_csv(self._parse_expression), 2858 into=self._match(TokenType.INTO) and self._parse_table_part(), 2859 ) 2860 2861 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2862 if not self._match(TokenType.FORMAT): 2863 return None 2864 return self._parse_row_format() 2865 2866 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2867 index = self._index 2868 with_ = with_ or self._match_text_seq("WITH") 2869 2870 if not self._match(TokenType.SERDE_PROPERTIES): 2871 self._retreat(index) 2872 return None 2873 return self.expression( 2874 exp.SerdeProperties, 2875 **{ # type: ignore 2876 "expressions": self._parse_wrapped_properties(), 2877 "with": with_, 2878 }, 2879 ) 2880 2881 def _parse_row_format( 2882 self, match_row: bool = False 2883 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2884 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2885 return None 2886 2887 if self._match_text_seq("SERDE"): 2888 this = self._parse_string() 2889 2890 serde_properties = self._parse_serde_properties() 2891 2892 return self.expression( 2893 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2894 ) 2895 2896 self._match_text_seq("DELIMITED") 2897 2898 kwargs = {} 2899 2900 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2901 kwargs["fields"] = self._parse_string() 2902 if self._match_text_seq("ESCAPED", "BY"): 2903 kwargs["escaped"] = self._parse_string() 2904 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2905 kwargs["collection_items"] = self._parse_string() 2906 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2907 kwargs["map_keys"] = self._parse_string() 2908 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2909 kwargs["lines"] = self._parse_string() 2910 if self._match_text_seq("NULL", "DEFINED", "AS"): 2911 kwargs["null"] = self._parse_string() 2912 2913 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2914 2915 def _parse_load(self) -> exp.LoadData | exp.Command: 2916 if self._match_text_seq("DATA"): 2917 local = self._match_text_seq("LOCAL") 2918 self._match_text_seq("INPATH") 2919 inpath = self._parse_string() 2920 overwrite = self._match(TokenType.OVERWRITE) 2921 self._match_pair(TokenType.INTO, TokenType.TABLE) 2922 2923 return self.expression( 2924 exp.LoadData, 2925 this=self._parse_table(schema=True), 2926 local=local, 2927 overwrite=overwrite, 2928 inpath=inpath, 2929 partition=self._parse_partition(), 2930 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2931 serde=self._match_text_seq("SERDE") and self._parse_string(), 2932 ) 2933 return self._parse_as_command(self._prev) 2934 2935 def _parse_delete(self) -> exp.Delete: 2936 # This handles MySQL's "Multiple-Table Syntax" 2937 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2938 tables = None 2939 if not self._match(TokenType.FROM, advance=False): 2940 tables = self._parse_csv(self._parse_table) or None 2941 2942 returning = self._parse_returning() 2943 2944 return self.expression( 2945 exp.Delete, 2946 tables=tables, 2947 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2948 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2949 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2950 where=self._parse_where(), 2951 returning=returning or self._parse_returning(), 2952 limit=self._parse_limit(), 2953 ) 2954 2955 def _parse_update(self) -> exp.Update: 2956 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2957 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2958 returning = self._parse_returning() 2959 return self.expression( 2960 exp.Update, 2961 **{ # type: ignore 2962 "this": this, 2963 "expressions": expressions, 2964 "from": self._parse_from(joins=True), 2965 "where": self._parse_where(), 2966 "returning": returning or self._parse_returning(), 2967 "order": self._parse_order(), 2968 "limit": self._parse_limit(), 2969 }, 2970 ) 2971 2972 def _parse_uncache(self) -> exp.Uncache: 2973 if not self._match(TokenType.TABLE): 2974 self.raise_error("Expecting TABLE after UNCACHE") 2975 2976 return self.expression( 2977 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2978 ) 2979 2980 def _parse_cache(self) -> exp.Cache: 2981 lazy = self._match_text_seq("LAZY") 2982 self._match(TokenType.TABLE) 2983 table = self._parse_table(schema=True) 2984 2985 options = [] 2986 if self._match_text_seq("OPTIONS"): 2987 self._match_l_paren() 2988 k = self._parse_string() 2989 self._match(TokenType.EQ) 2990 v = self._parse_string() 2991 options = [k, v] 2992 self._match_r_paren() 2993 2994 self._match(TokenType.ALIAS) 2995 return self.expression( 2996 exp.Cache, 2997 this=table, 2998 lazy=lazy, 2999 options=options, 3000 expression=self._parse_select(nested=True), 3001 ) 3002 3003 def _parse_partition(self) -> t.Optional[exp.Partition]: 3004 if not self._match_texts(self.PARTITION_KEYWORDS): 3005 return None 3006 3007 return self.expression( 3008 exp.Partition, 3009 subpartition=self._prev.text.upper() == "SUBPARTITION", 3010 expressions=self._parse_wrapped_csv(self._parse_assignment), 3011 ) 3012 3013 def _parse_value(self) -> t.Optional[exp.Tuple]: 3014 def _parse_value_expression() -> t.Optional[exp.Expression]: 3015 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3016 return exp.var(self._prev.text.upper()) 3017 return self._parse_expression() 3018 3019 if self._match(TokenType.L_PAREN): 3020 expressions = self._parse_csv(_parse_value_expression) 3021 self._match_r_paren() 3022 return self.expression(exp.Tuple, expressions=expressions) 3023 3024 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3025 expression = self._parse_expression() 3026 if expression: 3027 return self.expression(exp.Tuple, expressions=[expression]) 3028 return None 3029 3030 def _parse_projections(self) -> t.List[exp.Expression]: 3031 return self._parse_expressions() 3032 3033 def _parse_select( 3034 self, 3035 nested: bool = False, 3036 table: bool = False, 3037 parse_subquery_alias: bool = True, 3038 parse_set_operation: bool = True, 3039 ) -> t.Optional[exp.Expression]: 3040 cte = self._parse_with() 3041 3042 if cte: 3043 this = self._parse_statement() 3044 3045 if not this: 3046 self.raise_error("Failed to parse any statement following CTE") 3047 return cte 3048 3049 if "with" in this.arg_types: 3050 this.set("with", cte) 3051 else: 3052 self.raise_error(f"{this.key} does not support CTE") 3053 this = cte 3054 3055 return this 3056 3057 # duckdb supports leading with FROM x 3058 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 3059 3060 if self._match(TokenType.SELECT): 3061 comments = self._prev_comments 3062 3063 hint = self._parse_hint() 3064 3065 if self._next and not self._next.token_type == TokenType.DOT: 3066 all_ = self._match(TokenType.ALL) 3067 distinct = self._match_set(self.DISTINCT_TOKENS) 3068 else: 3069 all_, distinct = None, None 3070 3071 kind = ( 3072 self._match(TokenType.ALIAS) 3073 and self._match_texts(("STRUCT", "VALUE")) 3074 and self._prev.text.upper() 3075 ) 3076 3077 if distinct: 3078 distinct = self.expression( 3079 exp.Distinct, 3080 on=self._parse_value() if self._match(TokenType.ON) else None, 3081 ) 3082 3083 if all_ and distinct: 3084 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3085 3086 operation_modifiers = [] 3087 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3088 operation_modifiers.append(exp.var(self._prev.text.upper())) 3089 3090 limit = self._parse_limit(top=True) 3091 projections = self._parse_projections() 3092 3093 this = self.expression( 3094 exp.Select, 3095 kind=kind, 3096 hint=hint, 3097 distinct=distinct, 3098 expressions=projections, 3099 limit=limit, 3100 operation_modifiers=operation_modifiers or None, 3101 ) 3102 this.comments = comments 3103 3104 into = self._parse_into() 3105 if into: 3106 this.set("into", into) 3107 3108 if not from_: 3109 from_ = self._parse_from() 3110 3111 if from_: 3112 this.set("from", from_) 3113 3114 this = self._parse_query_modifiers(this) 3115 elif (table or nested) and self._match(TokenType.L_PAREN): 3116 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3117 this = self._parse_simplified_pivot( 3118 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3119 ) 3120 elif self._match(TokenType.FROM): 3121 from_ = self._parse_from(skip_from_token=True) 3122 # Support parentheses for duckdb FROM-first syntax 3123 select = self._parse_select() 3124 if select: 3125 select.set("from", from_) 3126 this = select 3127 else: 3128 this = exp.select("*").from_(t.cast(exp.From, from_)) 3129 else: 3130 this = ( 3131 self._parse_table() 3132 if table 3133 else self._parse_select(nested=True, parse_set_operation=False) 3134 ) 3135 3136 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3137 # in case a modifier (e.g. join) is following 3138 if table and isinstance(this, exp.Values) and this.alias: 3139 alias = this.args["alias"].pop() 3140 this = exp.Table(this=this, alias=alias) 3141 3142 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3143 3144 self._match_r_paren() 3145 3146 # We return early here so that the UNION isn't attached to the subquery by the 3147 # following call to _parse_set_operations, but instead becomes the parent node 3148 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3149 elif self._match(TokenType.VALUES, advance=False): 3150 this = self._parse_derived_table_values() 3151 elif from_: 3152 this = exp.select("*").from_(from_.this, copy=False) 3153 elif self._match(TokenType.SUMMARIZE): 3154 table = self._match(TokenType.TABLE) 3155 this = self._parse_select() or self._parse_string() or self._parse_table() 3156 return self.expression(exp.Summarize, this=this, table=table) 3157 elif self._match(TokenType.DESCRIBE): 3158 this = self._parse_describe() 3159 elif self._match_text_seq("STREAM"): 3160 this = self._parse_function() 3161 if this: 3162 this = self.expression(exp.Stream, this=this) 3163 else: 3164 self._retreat(self._index - 1) 3165 else: 3166 this = None 3167 3168 return self._parse_set_operations(this) if parse_set_operation else this 3169 3170 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3171 if not skip_with_token and not self._match(TokenType.WITH): 3172 return None 3173 3174 comments = self._prev_comments 3175 recursive = self._match(TokenType.RECURSIVE) 3176 3177 last_comments = None 3178 expressions = [] 3179 while True: 3180 expressions.append(self._parse_cte()) 3181 if last_comments: 3182 expressions[-1].add_comments(last_comments) 3183 3184 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3185 break 3186 else: 3187 self._match(TokenType.WITH) 3188 3189 last_comments = self._prev_comments 3190 3191 return self.expression( 3192 exp.With, comments=comments, expressions=expressions, recursive=recursive 3193 ) 3194 3195 def _parse_cte(self) -> t.Optional[exp.CTE]: 3196 index = self._index 3197 3198 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3199 if not alias or not alias.this: 3200 self.raise_error("Expected CTE to have alias") 3201 3202 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3203 self._retreat(index) 3204 return None 3205 3206 comments = self._prev_comments 3207 3208 if self._match_text_seq("NOT", "MATERIALIZED"): 3209 materialized = False 3210 elif self._match_text_seq("MATERIALIZED"): 3211 materialized = True 3212 else: 3213 materialized = None 3214 3215 cte = self.expression( 3216 exp.CTE, 3217 this=self._parse_wrapped(self._parse_statement), 3218 alias=alias, 3219 materialized=materialized, 3220 comments=comments, 3221 ) 3222 3223 if isinstance(cte.this, exp.Values): 3224 cte.set("this", exp.select("*").from_(exp.alias_(cte.this, "_values", table=True))) 3225 3226 return cte 3227 3228 def _parse_table_alias( 3229 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3230 ) -> t.Optional[exp.TableAlias]: 3231 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3232 # so this section tries to parse the clause version and if it fails, it treats the token 3233 # as an identifier (alias) 3234 if self._can_parse_limit_or_offset(): 3235 return None 3236 3237 any_token = self._match(TokenType.ALIAS) 3238 alias = ( 3239 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3240 or self._parse_string_as_identifier() 3241 ) 3242 3243 index = self._index 3244 if self._match(TokenType.L_PAREN): 3245 columns = self._parse_csv(self._parse_function_parameter) 3246 self._match_r_paren() if columns else self._retreat(index) 3247 else: 3248 columns = None 3249 3250 if not alias and not columns: 3251 return None 3252 3253 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3254 3255 # We bubble up comments from the Identifier to the TableAlias 3256 if isinstance(alias, exp.Identifier): 3257 table_alias.add_comments(alias.pop_comments()) 3258 3259 return table_alias 3260 3261 def _parse_subquery( 3262 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3263 ) -> t.Optional[exp.Subquery]: 3264 if not this: 3265 return None 3266 3267 return self.expression( 3268 exp.Subquery, 3269 this=this, 3270 pivots=self._parse_pivots(), 3271 alias=self._parse_table_alias() if parse_alias else None, 3272 sample=self._parse_table_sample(), 3273 ) 3274 3275 def _implicit_unnests_to_explicit(self, this: E) -> E: 3276 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3277 3278 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3279 for i, join in enumerate(this.args.get("joins") or []): 3280 table = join.this 3281 normalized_table = table.copy() 3282 normalized_table.meta["maybe_column"] = True 3283 normalized_table = _norm(normalized_table, dialect=self.dialect) 3284 3285 if isinstance(table, exp.Table) and not join.args.get("on"): 3286 if normalized_table.parts[0].name in refs: 3287 table_as_column = table.to_column() 3288 unnest = exp.Unnest(expressions=[table_as_column]) 3289 3290 # Table.to_column creates a parent Alias node that we want to convert to 3291 # a TableAlias and attach to the Unnest, so it matches the parser's output 3292 if isinstance(table.args.get("alias"), exp.TableAlias): 3293 table_as_column.replace(table_as_column.this) 3294 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3295 3296 table.replace(unnest) 3297 3298 refs.add(normalized_table.alias_or_name) 3299 3300 return this 3301 3302 def _parse_query_modifiers( 3303 self, this: t.Optional[exp.Expression] 3304 ) -> t.Optional[exp.Expression]: 3305 if isinstance(this, (exp.Query, exp.Table)): 3306 for join in self._parse_joins(): 3307 this.append("joins", join) 3308 for lateral in iter(self._parse_lateral, None): 3309 this.append("laterals", lateral) 3310 3311 while True: 3312 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3313 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3314 key, expression = parser(self) 3315 3316 if expression: 3317 this.set(key, expression) 3318 if key == "limit": 3319 offset = expression.args.pop("offset", None) 3320 3321 if offset: 3322 offset = exp.Offset(expression=offset) 3323 this.set("offset", offset) 3324 3325 limit_by_expressions = expression.expressions 3326 expression.set("expressions", None) 3327 offset.set("expressions", limit_by_expressions) 3328 continue 3329 break 3330 3331 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3332 this = self._implicit_unnests_to_explicit(this) 3333 3334 return this 3335 3336 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3337 start = self._curr 3338 while self._curr: 3339 self._advance() 3340 3341 end = self._tokens[self._index - 1] 3342 return exp.Hint(expressions=[self._find_sql(start, end)]) 3343 3344 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3345 return self._parse_function_call() 3346 3347 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3348 start_index = self._index 3349 should_fallback_to_string = False 3350 3351 hints = [] 3352 try: 3353 for hint in iter( 3354 lambda: self._parse_csv( 3355 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3356 ), 3357 [], 3358 ): 3359 hints.extend(hint) 3360 except ParseError: 3361 should_fallback_to_string = True 3362 3363 if should_fallback_to_string or self._curr: 3364 self._retreat(start_index) 3365 return self._parse_hint_fallback_to_string() 3366 3367 return self.expression(exp.Hint, expressions=hints) 3368 3369 def _parse_hint(self) -> t.Optional[exp.Hint]: 3370 if self._match(TokenType.HINT) and self._prev_comments: 3371 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3372 3373 return None 3374 3375 def _parse_into(self) -> t.Optional[exp.Into]: 3376 if not self._match(TokenType.INTO): 3377 return None 3378 3379 temp = self._match(TokenType.TEMPORARY) 3380 unlogged = self._match_text_seq("UNLOGGED") 3381 self._match(TokenType.TABLE) 3382 3383 return self.expression( 3384 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3385 ) 3386 3387 def _parse_from( 3388 self, joins: bool = False, skip_from_token: bool = False 3389 ) -> t.Optional[exp.From]: 3390 if not skip_from_token and not self._match(TokenType.FROM): 3391 return None 3392 3393 return self.expression( 3394 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3395 ) 3396 3397 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3398 return self.expression( 3399 exp.MatchRecognizeMeasure, 3400 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3401 this=self._parse_expression(), 3402 ) 3403 3404 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3405 if not self._match(TokenType.MATCH_RECOGNIZE): 3406 return None 3407 3408 self._match_l_paren() 3409 3410 partition = self._parse_partition_by() 3411 order = self._parse_order() 3412 3413 measures = ( 3414 self._parse_csv(self._parse_match_recognize_measure) 3415 if self._match_text_seq("MEASURES") 3416 else None 3417 ) 3418 3419 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3420 rows = exp.var("ONE ROW PER MATCH") 3421 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3422 text = "ALL ROWS PER MATCH" 3423 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3424 text += " SHOW EMPTY MATCHES" 3425 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3426 text += " OMIT EMPTY MATCHES" 3427 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3428 text += " WITH UNMATCHED ROWS" 3429 rows = exp.var(text) 3430 else: 3431 rows = None 3432 3433 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3434 text = "AFTER MATCH SKIP" 3435 if self._match_text_seq("PAST", "LAST", "ROW"): 3436 text += " PAST LAST ROW" 3437 elif self._match_text_seq("TO", "NEXT", "ROW"): 3438 text += " TO NEXT ROW" 3439 elif self._match_text_seq("TO", "FIRST"): 3440 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3441 elif self._match_text_seq("TO", "LAST"): 3442 text += f" TO LAST {self._advance_any().text}" # type: ignore 3443 after = exp.var(text) 3444 else: 3445 after = None 3446 3447 if self._match_text_seq("PATTERN"): 3448 self._match_l_paren() 3449 3450 if not self._curr: 3451 self.raise_error("Expecting )", self._curr) 3452 3453 paren = 1 3454 start = self._curr 3455 3456 while self._curr and paren > 0: 3457 if self._curr.token_type == TokenType.L_PAREN: 3458 paren += 1 3459 if self._curr.token_type == TokenType.R_PAREN: 3460 paren -= 1 3461 3462 end = self._prev 3463 self._advance() 3464 3465 if paren > 0: 3466 self.raise_error("Expecting )", self._curr) 3467 3468 pattern = exp.var(self._find_sql(start, end)) 3469 else: 3470 pattern = None 3471 3472 define = ( 3473 self._parse_csv(self._parse_name_as_expression) 3474 if self._match_text_seq("DEFINE") 3475 else None 3476 ) 3477 3478 self._match_r_paren() 3479 3480 return self.expression( 3481 exp.MatchRecognize, 3482 partition_by=partition, 3483 order=order, 3484 measures=measures, 3485 rows=rows, 3486 after=after, 3487 pattern=pattern, 3488 define=define, 3489 alias=self._parse_table_alias(), 3490 ) 3491 3492 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3493 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3494 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3495 cross_apply = False 3496 3497 if cross_apply is not None: 3498 this = self._parse_select(table=True) 3499 view = None 3500 outer = None 3501 elif self._match(TokenType.LATERAL): 3502 this = self._parse_select(table=True) 3503 view = self._match(TokenType.VIEW) 3504 outer = self._match(TokenType.OUTER) 3505 else: 3506 return None 3507 3508 if not this: 3509 this = ( 3510 self._parse_unnest() 3511 or self._parse_function() 3512 or self._parse_id_var(any_token=False) 3513 ) 3514 3515 while self._match(TokenType.DOT): 3516 this = exp.Dot( 3517 this=this, 3518 expression=self._parse_function() or self._parse_id_var(any_token=False), 3519 ) 3520 3521 if view: 3522 table = self._parse_id_var(any_token=False) 3523 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3524 table_alias: t.Optional[exp.TableAlias] = self.expression( 3525 exp.TableAlias, this=table, columns=columns 3526 ) 3527 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3528 # We move the alias from the lateral's child node to the lateral itself 3529 table_alias = this.args["alias"].pop() 3530 else: 3531 table_alias = self._parse_table_alias() 3532 3533 return self.expression( 3534 exp.Lateral, 3535 this=this, 3536 view=view, 3537 outer=outer, 3538 alias=table_alias, 3539 cross_apply=cross_apply, 3540 ) 3541 3542 def _parse_join_parts( 3543 self, 3544 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3545 return ( 3546 self._match_set(self.JOIN_METHODS) and self._prev, 3547 self._match_set(self.JOIN_SIDES) and self._prev, 3548 self._match_set(self.JOIN_KINDS) and self._prev, 3549 ) 3550 3551 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3552 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3553 this = self._parse_column() 3554 if isinstance(this, exp.Column): 3555 return this.this 3556 return this 3557 3558 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3559 3560 def _parse_join( 3561 self, skip_join_token: bool = False, parse_bracket: bool = False 3562 ) -> t.Optional[exp.Join]: 3563 if self._match(TokenType.COMMA): 3564 return self.expression(exp.Join, this=self._parse_table()) 3565 3566 index = self._index 3567 method, side, kind = self._parse_join_parts() 3568 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3569 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3570 3571 if not skip_join_token and not join: 3572 self._retreat(index) 3573 kind = None 3574 method = None 3575 side = None 3576 3577 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3578 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3579 3580 if not skip_join_token and not join and not outer_apply and not cross_apply: 3581 return None 3582 3583 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3584 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3585 kwargs["expressions"] = self._parse_csv( 3586 lambda: self._parse_table(parse_bracket=parse_bracket) 3587 ) 3588 3589 if method: 3590 kwargs["method"] = method.text 3591 if side: 3592 kwargs["side"] = side.text 3593 if kind: 3594 kwargs["kind"] = kind.text 3595 if hint: 3596 kwargs["hint"] = hint 3597 3598 if self._match(TokenType.MATCH_CONDITION): 3599 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3600 3601 if self._match(TokenType.ON): 3602 kwargs["on"] = self._parse_assignment() 3603 elif self._match(TokenType.USING): 3604 kwargs["using"] = self._parse_using_identifiers() 3605 elif ( 3606 not (outer_apply or cross_apply) 3607 and not isinstance(kwargs["this"], exp.Unnest) 3608 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3609 ): 3610 index = self._index 3611 joins: t.Optional[list] = list(self._parse_joins()) 3612 3613 if joins and self._match(TokenType.ON): 3614 kwargs["on"] = self._parse_assignment() 3615 elif joins and self._match(TokenType.USING): 3616 kwargs["using"] = self._parse_using_identifiers() 3617 else: 3618 joins = None 3619 self._retreat(index) 3620 3621 kwargs["this"].set("joins", joins if joins else None) 3622 3623 comments = [c for token in (method, side, kind) if token for c in token.comments] 3624 return self.expression(exp.Join, comments=comments, **kwargs) 3625 3626 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3627 this = self._parse_assignment() 3628 3629 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3630 return this 3631 3632 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3633 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3634 3635 return this 3636 3637 def _parse_index_params(self) -> exp.IndexParameters: 3638 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3639 3640 if self._match(TokenType.L_PAREN, advance=False): 3641 columns = self._parse_wrapped_csv(self._parse_with_operator) 3642 else: 3643 columns = None 3644 3645 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3646 partition_by = self._parse_partition_by() 3647 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3648 tablespace = ( 3649 self._parse_var(any_token=True) 3650 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3651 else None 3652 ) 3653 where = self._parse_where() 3654 3655 on = self._parse_field() if self._match(TokenType.ON) else None 3656 3657 return self.expression( 3658 exp.IndexParameters, 3659 using=using, 3660 columns=columns, 3661 include=include, 3662 partition_by=partition_by, 3663 where=where, 3664 with_storage=with_storage, 3665 tablespace=tablespace, 3666 on=on, 3667 ) 3668 3669 def _parse_index( 3670 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3671 ) -> t.Optional[exp.Index]: 3672 if index or anonymous: 3673 unique = None 3674 primary = None 3675 amp = None 3676 3677 self._match(TokenType.ON) 3678 self._match(TokenType.TABLE) # hive 3679 table = self._parse_table_parts(schema=True) 3680 else: 3681 unique = self._match(TokenType.UNIQUE) 3682 primary = self._match_text_seq("PRIMARY") 3683 amp = self._match_text_seq("AMP") 3684 3685 if not self._match(TokenType.INDEX): 3686 return None 3687 3688 index = self._parse_id_var() 3689 table = None 3690 3691 params = self._parse_index_params() 3692 3693 return self.expression( 3694 exp.Index, 3695 this=index, 3696 table=table, 3697 unique=unique, 3698 primary=primary, 3699 amp=amp, 3700 params=params, 3701 ) 3702 3703 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3704 hints: t.List[exp.Expression] = [] 3705 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3706 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3707 hints.append( 3708 self.expression( 3709 exp.WithTableHint, 3710 expressions=self._parse_csv( 3711 lambda: self._parse_function() or self._parse_var(any_token=True) 3712 ), 3713 ) 3714 ) 3715 self._match_r_paren() 3716 else: 3717 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3718 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3719 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3720 3721 self._match_set((TokenType.INDEX, TokenType.KEY)) 3722 if self._match(TokenType.FOR): 3723 hint.set("target", self._advance_any() and self._prev.text.upper()) 3724 3725 hint.set("expressions", self._parse_wrapped_id_vars()) 3726 hints.append(hint) 3727 3728 return hints or None 3729 3730 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3731 return ( 3732 (not schema and self._parse_function(optional_parens=False)) 3733 or self._parse_id_var(any_token=False) 3734 or self._parse_string_as_identifier() 3735 or self._parse_placeholder() 3736 ) 3737 3738 def _parse_table_parts( 3739 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3740 ) -> exp.Table: 3741 catalog = None 3742 db = None 3743 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3744 3745 while self._match(TokenType.DOT): 3746 if catalog: 3747 # This allows nesting the table in arbitrarily many dot expressions if needed 3748 table = self.expression( 3749 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3750 ) 3751 else: 3752 catalog = db 3753 db = table 3754 # "" used for tsql FROM a..b case 3755 table = self._parse_table_part(schema=schema) or "" 3756 3757 if ( 3758 wildcard 3759 and self._is_connected() 3760 and (isinstance(table, exp.Identifier) or not table) 3761 and self._match(TokenType.STAR) 3762 ): 3763 if isinstance(table, exp.Identifier): 3764 table.args["this"] += "*" 3765 else: 3766 table = exp.Identifier(this="*") 3767 3768 # We bubble up comments from the Identifier to the Table 3769 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3770 3771 if is_db_reference: 3772 catalog = db 3773 db = table 3774 table = None 3775 3776 if not table and not is_db_reference: 3777 self.raise_error(f"Expected table name but got {self._curr}") 3778 if not db and is_db_reference: 3779 self.raise_error(f"Expected database name but got {self._curr}") 3780 3781 table = self.expression( 3782 exp.Table, 3783 comments=comments, 3784 this=table, 3785 db=db, 3786 catalog=catalog, 3787 ) 3788 3789 changes = self._parse_changes() 3790 if changes: 3791 table.set("changes", changes) 3792 3793 at_before = self._parse_historical_data() 3794 if at_before: 3795 table.set("when", at_before) 3796 3797 pivots = self._parse_pivots() 3798 if pivots: 3799 table.set("pivots", pivots) 3800 3801 return table 3802 3803 def _parse_table( 3804 self, 3805 schema: bool = False, 3806 joins: bool = False, 3807 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3808 parse_bracket: bool = False, 3809 is_db_reference: bool = False, 3810 parse_partition: bool = False, 3811 ) -> t.Optional[exp.Expression]: 3812 lateral = self._parse_lateral() 3813 if lateral: 3814 return lateral 3815 3816 unnest = self._parse_unnest() 3817 if unnest: 3818 return unnest 3819 3820 values = self._parse_derived_table_values() 3821 if values: 3822 return values 3823 3824 subquery = self._parse_select(table=True) 3825 if subquery: 3826 if not subquery.args.get("pivots"): 3827 subquery.set("pivots", self._parse_pivots()) 3828 return subquery 3829 3830 bracket = parse_bracket and self._parse_bracket(None) 3831 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3832 3833 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3834 self._parse_table 3835 ) 3836 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3837 3838 only = self._match(TokenType.ONLY) 3839 3840 this = t.cast( 3841 exp.Expression, 3842 bracket 3843 or rows_from 3844 or self._parse_bracket( 3845 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3846 ), 3847 ) 3848 3849 if only: 3850 this.set("only", only) 3851 3852 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3853 self._match_text_seq("*") 3854 3855 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3856 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3857 this.set("partition", self._parse_partition()) 3858 3859 if schema: 3860 return self._parse_schema(this=this) 3861 3862 version = self._parse_version() 3863 3864 if version: 3865 this.set("version", version) 3866 3867 if self.dialect.ALIAS_POST_TABLESAMPLE: 3868 this.set("sample", self._parse_table_sample()) 3869 3870 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3871 if alias: 3872 this.set("alias", alias) 3873 3874 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3875 return self.expression( 3876 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3877 ) 3878 3879 this.set("hints", self._parse_table_hints()) 3880 3881 if not this.args.get("pivots"): 3882 this.set("pivots", self._parse_pivots()) 3883 3884 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3885 this.set("sample", self._parse_table_sample()) 3886 3887 if joins: 3888 for join in self._parse_joins(): 3889 this.append("joins", join) 3890 3891 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3892 this.set("ordinality", True) 3893 this.set("alias", self._parse_table_alias()) 3894 3895 return this 3896 3897 def _parse_version(self) -> t.Optional[exp.Version]: 3898 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3899 this = "TIMESTAMP" 3900 elif self._match(TokenType.VERSION_SNAPSHOT): 3901 this = "VERSION" 3902 else: 3903 return None 3904 3905 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3906 kind = self._prev.text.upper() 3907 start = self._parse_bitwise() 3908 self._match_texts(("TO", "AND")) 3909 end = self._parse_bitwise() 3910 expression: t.Optional[exp.Expression] = self.expression( 3911 exp.Tuple, expressions=[start, end] 3912 ) 3913 elif self._match_text_seq("CONTAINED", "IN"): 3914 kind = "CONTAINED IN" 3915 expression = self.expression( 3916 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3917 ) 3918 elif self._match(TokenType.ALL): 3919 kind = "ALL" 3920 expression = None 3921 else: 3922 self._match_text_seq("AS", "OF") 3923 kind = "AS OF" 3924 expression = self._parse_type() 3925 3926 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3927 3928 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3929 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3930 index = self._index 3931 historical_data = None 3932 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3933 this = self._prev.text.upper() 3934 kind = ( 3935 self._match(TokenType.L_PAREN) 3936 and self._match_texts(self.HISTORICAL_DATA_KIND) 3937 and self._prev.text.upper() 3938 ) 3939 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3940 3941 if expression: 3942 self._match_r_paren() 3943 historical_data = self.expression( 3944 exp.HistoricalData, this=this, kind=kind, expression=expression 3945 ) 3946 else: 3947 self._retreat(index) 3948 3949 return historical_data 3950 3951 def _parse_changes(self) -> t.Optional[exp.Changes]: 3952 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3953 return None 3954 3955 information = self._parse_var(any_token=True) 3956 self._match_r_paren() 3957 3958 return self.expression( 3959 exp.Changes, 3960 information=information, 3961 at_before=self._parse_historical_data(), 3962 end=self._parse_historical_data(), 3963 ) 3964 3965 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3966 if not self._match(TokenType.UNNEST): 3967 return None 3968 3969 expressions = self._parse_wrapped_csv(self._parse_equality) 3970 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3971 3972 alias = self._parse_table_alias() if with_alias else None 3973 3974 if alias: 3975 if self.dialect.UNNEST_COLUMN_ONLY: 3976 if alias.args.get("columns"): 3977 self.raise_error("Unexpected extra column alias in unnest.") 3978 3979 alias.set("columns", [alias.this]) 3980 alias.set("this", None) 3981 3982 columns = alias.args.get("columns") or [] 3983 if offset and len(expressions) < len(columns): 3984 offset = columns.pop() 3985 3986 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3987 self._match(TokenType.ALIAS) 3988 offset = self._parse_id_var( 3989 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3990 ) or exp.to_identifier("offset") 3991 3992 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3993 3994 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3995 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3996 if not is_derived and not ( 3997 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3998 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3999 ): 4000 return None 4001 4002 expressions = self._parse_csv(self._parse_value) 4003 alias = self._parse_table_alias() 4004 4005 if is_derived: 4006 self._match_r_paren() 4007 4008 return self.expression( 4009 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4010 ) 4011 4012 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4013 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4014 as_modifier and self._match_text_seq("USING", "SAMPLE") 4015 ): 4016 return None 4017 4018 bucket_numerator = None 4019 bucket_denominator = None 4020 bucket_field = None 4021 percent = None 4022 size = None 4023 seed = None 4024 4025 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4026 matched_l_paren = self._match(TokenType.L_PAREN) 4027 4028 if self.TABLESAMPLE_CSV: 4029 num = None 4030 expressions = self._parse_csv(self._parse_primary) 4031 else: 4032 expressions = None 4033 num = ( 4034 self._parse_factor() 4035 if self._match(TokenType.NUMBER, advance=False) 4036 else self._parse_primary() or self._parse_placeholder() 4037 ) 4038 4039 if self._match_text_seq("BUCKET"): 4040 bucket_numerator = self._parse_number() 4041 self._match_text_seq("OUT", "OF") 4042 bucket_denominator = bucket_denominator = self._parse_number() 4043 self._match(TokenType.ON) 4044 bucket_field = self._parse_field() 4045 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4046 percent = num 4047 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4048 size = num 4049 else: 4050 percent = num 4051 4052 if matched_l_paren: 4053 self._match_r_paren() 4054 4055 if self._match(TokenType.L_PAREN): 4056 method = self._parse_var(upper=True) 4057 seed = self._match(TokenType.COMMA) and self._parse_number() 4058 self._match_r_paren() 4059 elif self._match_texts(("SEED", "REPEATABLE")): 4060 seed = self._parse_wrapped(self._parse_number) 4061 4062 if not method and self.DEFAULT_SAMPLING_METHOD: 4063 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4064 4065 return self.expression( 4066 exp.TableSample, 4067 expressions=expressions, 4068 method=method, 4069 bucket_numerator=bucket_numerator, 4070 bucket_denominator=bucket_denominator, 4071 bucket_field=bucket_field, 4072 percent=percent, 4073 size=size, 4074 seed=seed, 4075 ) 4076 4077 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4078 return list(iter(self._parse_pivot, None)) or None 4079 4080 def _parse_joins(self) -> t.Iterator[exp.Join]: 4081 return iter(self._parse_join, None) 4082 4083 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4084 if not self._match(TokenType.INTO): 4085 return None 4086 4087 return self.expression( 4088 exp.UnpivotColumns, 4089 this=self._match_text_seq("NAME") and self._parse_column(), 4090 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4091 ) 4092 4093 # https://duckdb.org/docs/sql/statements/pivot 4094 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4095 def _parse_on() -> t.Optional[exp.Expression]: 4096 this = self._parse_bitwise() 4097 4098 if self._match(TokenType.IN): 4099 # PIVOT ... ON col IN (row_val1, row_val2) 4100 return self._parse_in(this) 4101 if self._match(TokenType.ALIAS, advance=False): 4102 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4103 return self._parse_alias(this) 4104 4105 return this 4106 4107 this = self._parse_table() 4108 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4109 into = self._parse_unpivot_columns() 4110 using = self._match(TokenType.USING) and self._parse_csv( 4111 lambda: self._parse_alias(self._parse_function()) 4112 ) 4113 group = self._parse_group() 4114 4115 return self.expression( 4116 exp.Pivot, 4117 this=this, 4118 expressions=expressions, 4119 using=using, 4120 group=group, 4121 unpivot=is_unpivot, 4122 into=into, 4123 ) 4124 4125 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 4126 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4127 this = self._parse_select_or_expression() 4128 4129 self._match(TokenType.ALIAS) 4130 alias = self._parse_bitwise() 4131 if alias: 4132 if isinstance(alias, exp.Column) and not alias.db: 4133 alias = alias.this 4134 return self.expression(exp.PivotAlias, this=this, alias=alias) 4135 4136 return this 4137 4138 value = self._parse_column() 4139 4140 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4141 self.raise_error("Expecting IN (") 4142 4143 if self._match(TokenType.ANY): 4144 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4145 else: 4146 exprs = self._parse_csv(_parse_aliased_expression) 4147 4148 self._match_r_paren() 4149 return self.expression(exp.In, this=value, expressions=exprs) 4150 4151 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4152 index = self._index 4153 include_nulls = None 4154 4155 if self._match(TokenType.PIVOT): 4156 unpivot = False 4157 elif self._match(TokenType.UNPIVOT): 4158 unpivot = True 4159 4160 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4161 if self._match_text_seq("INCLUDE", "NULLS"): 4162 include_nulls = True 4163 elif self._match_text_seq("EXCLUDE", "NULLS"): 4164 include_nulls = False 4165 else: 4166 return None 4167 4168 expressions = [] 4169 4170 if not self._match(TokenType.L_PAREN): 4171 self._retreat(index) 4172 return None 4173 4174 if unpivot: 4175 expressions = self._parse_csv(self._parse_column) 4176 else: 4177 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 4178 4179 if not expressions: 4180 self.raise_error("Failed to parse PIVOT's aggregation list") 4181 4182 if not self._match(TokenType.FOR): 4183 self.raise_error("Expecting FOR") 4184 4185 field = self._parse_pivot_in() 4186 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4187 self._parse_bitwise 4188 ) 4189 4190 self._match_r_paren() 4191 4192 pivot = self.expression( 4193 exp.Pivot, 4194 expressions=expressions, 4195 field=field, 4196 unpivot=unpivot, 4197 include_nulls=include_nulls, 4198 default_on_null=default_on_null, 4199 ) 4200 4201 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4202 pivot.set("alias", self._parse_table_alias()) 4203 4204 if not unpivot: 4205 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4206 4207 columns: t.List[exp.Expression] = [] 4208 for fld in pivot.args["field"].expressions: 4209 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4210 for name in names: 4211 if self.PREFIXED_PIVOT_COLUMNS: 4212 name = f"{name}_{field_name}" if name else field_name 4213 else: 4214 name = f"{field_name}_{name}" if name else field_name 4215 4216 columns.append(exp.to_identifier(name)) 4217 4218 pivot.set("columns", columns) 4219 4220 return pivot 4221 4222 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4223 return [agg.alias for agg in aggregations] 4224 4225 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4226 if not skip_where_token and not self._match(TokenType.PREWHERE): 4227 return None 4228 4229 return self.expression( 4230 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4231 ) 4232 4233 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4234 if not skip_where_token and not self._match(TokenType.WHERE): 4235 return None 4236 4237 return self.expression( 4238 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4239 ) 4240 4241 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4242 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4243 return None 4244 4245 elements: t.Dict[str, t.Any] = defaultdict(list) 4246 4247 if self._match(TokenType.ALL): 4248 elements["all"] = True 4249 elif self._match(TokenType.DISTINCT): 4250 elements["all"] = False 4251 4252 while True: 4253 index = self._index 4254 4255 elements["expressions"].extend( 4256 self._parse_csv( 4257 lambda: None 4258 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4259 else self._parse_assignment() 4260 ) 4261 ) 4262 4263 before_with_index = self._index 4264 with_prefix = self._match(TokenType.WITH) 4265 4266 if self._match(TokenType.ROLLUP): 4267 elements["rollup"].append( 4268 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4269 ) 4270 elif self._match(TokenType.CUBE): 4271 elements["cube"].append( 4272 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4273 ) 4274 elif self._match(TokenType.GROUPING_SETS): 4275 elements["grouping_sets"].append( 4276 self.expression( 4277 exp.GroupingSets, 4278 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4279 ) 4280 ) 4281 elif self._match_text_seq("TOTALS"): 4282 elements["totals"] = True # type: ignore 4283 4284 if before_with_index <= self._index <= before_with_index + 1: 4285 self._retreat(before_with_index) 4286 break 4287 4288 if index == self._index: 4289 break 4290 4291 return self.expression(exp.Group, **elements) # type: ignore 4292 4293 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4294 return self.expression( 4295 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4296 ) 4297 4298 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4299 if self._match(TokenType.L_PAREN): 4300 grouping_set = self._parse_csv(self._parse_column) 4301 self._match_r_paren() 4302 return self.expression(exp.Tuple, expressions=grouping_set) 4303 4304 return self._parse_column() 4305 4306 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4307 if not skip_having_token and not self._match(TokenType.HAVING): 4308 return None 4309 return self.expression(exp.Having, this=self._parse_assignment()) 4310 4311 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4312 if not self._match(TokenType.QUALIFY): 4313 return None 4314 return self.expression(exp.Qualify, this=self._parse_assignment()) 4315 4316 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4317 if skip_start_token: 4318 start = None 4319 elif self._match(TokenType.START_WITH): 4320 start = self._parse_assignment() 4321 else: 4322 return None 4323 4324 self._match(TokenType.CONNECT_BY) 4325 nocycle = self._match_text_seq("NOCYCLE") 4326 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4327 exp.Prior, this=self._parse_bitwise() 4328 ) 4329 connect = self._parse_assignment() 4330 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4331 4332 if not start and self._match(TokenType.START_WITH): 4333 start = self._parse_assignment() 4334 4335 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4336 4337 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4338 this = self._parse_id_var(any_token=True) 4339 if self._match(TokenType.ALIAS): 4340 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4341 return this 4342 4343 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4344 if self._match_text_seq("INTERPOLATE"): 4345 return self._parse_wrapped_csv(self._parse_name_as_expression) 4346 return None 4347 4348 def _parse_order( 4349 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4350 ) -> t.Optional[exp.Expression]: 4351 siblings = None 4352 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4353 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4354 return this 4355 4356 siblings = True 4357 4358 return self.expression( 4359 exp.Order, 4360 this=this, 4361 expressions=self._parse_csv(self._parse_ordered), 4362 siblings=siblings, 4363 ) 4364 4365 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4366 if not self._match(token): 4367 return None 4368 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4369 4370 def _parse_ordered( 4371 self, parse_method: t.Optional[t.Callable] = None 4372 ) -> t.Optional[exp.Ordered]: 4373 this = parse_method() if parse_method else self._parse_assignment() 4374 if not this: 4375 return None 4376 4377 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4378 this = exp.var("ALL") 4379 4380 asc = self._match(TokenType.ASC) 4381 desc = self._match(TokenType.DESC) or (asc and False) 4382 4383 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4384 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4385 4386 nulls_first = is_nulls_first or False 4387 explicitly_null_ordered = is_nulls_first or is_nulls_last 4388 4389 if ( 4390 not explicitly_null_ordered 4391 and ( 4392 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4393 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4394 ) 4395 and self.dialect.NULL_ORDERING != "nulls_are_last" 4396 ): 4397 nulls_first = True 4398 4399 if self._match_text_seq("WITH", "FILL"): 4400 with_fill = self.expression( 4401 exp.WithFill, 4402 **{ # type: ignore 4403 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4404 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4405 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4406 "interpolate": self._parse_interpolate(), 4407 }, 4408 ) 4409 else: 4410 with_fill = None 4411 4412 return self.expression( 4413 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4414 ) 4415 4416 def _parse_limit( 4417 self, 4418 this: t.Optional[exp.Expression] = None, 4419 top: bool = False, 4420 skip_limit_token: bool = False, 4421 ) -> t.Optional[exp.Expression]: 4422 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4423 comments = self._prev_comments 4424 if top: 4425 limit_paren = self._match(TokenType.L_PAREN) 4426 expression = self._parse_term() if limit_paren else self._parse_number() 4427 4428 if limit_paren: 4429 self._match_r_paren() 4430 else: 4431 expression = self._parse_term() 4432 4433 if self._match(TokenType.COMMA): 4434 offset = expression 4435 expression = self._parse_term() 4436 else: 4437 offset = None 4438 4439 limit_exp = self.expression( 4440 exp.Limit, 4441 this=this, 4442 expression=expression, 4443 offset=offset, 4444 comments=comments, 4445 expressions=self._parse_limit_by(), 4446 ) 4447 4448 return limit_exp 4449 4450 if self._match(TokenType.FETCH): 4451 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4452 direction = self._prev.text.upper() if direction else "FIRST" 4453 4454 count = self._parse_field(tokens=self.FETCH_TOKENS) 4455 percent = self._match(TokenType.PERCENT) 4456 4457 self._match_set((TokenType.ROW, TokenType.ROWS)) 4458 4459 only = self._match_text_seq("ONLY") 4460 with_ties = self._match_text_seq("WITH", "TIES") 4461 4462 if only and with_ties: 4463 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4464 4465 return self.expression( 4466 exp.Fetch, 4467 direction=direction, 4468 count=count, 4469 percent=percent, 4470 with_ties=with_ties, 4471 ) 4472 4473 return this 4474 4475 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4476 if not self._match(TokenType.OFFSET): 4477 return this 4478 4479 count = self._parse_term() 4480 self._match_set((TokenType.ROW, TokenType.ROWS)) 4481 4482 return self.expression( 4483 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4484 ) 4485 4486 def _can_parse_limit_or_offset(self) -> bool: 4487 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4488 return False 4489 4490 index = self._index 4491 result = bool( 4492 self._try_parse(self._parse_limit, retreat=True) 4493 or self._try_parse(self._parse_offset, retreat=True) 4494 ) 4495 self._retreat(index) 4496 return result 4497 4498 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4499 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4500 4501 def _parse_locks(self) -> t.List[exp.Lock]: 4502 locks = [] 4503 while True: 4504 if self._match_text_seq("FOR", "UPDATE"): 4505 update = True 4506 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4507 "LOCK", "IN", "SHARE", "MODE" 4508 ): 4509 update = False 4510 else: 4511 break 4512 4513 expressions = None 4514 if self._match_text_seq("OF"): 4515 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4516 4517 wait: t.Optional[bool | exp.Expression] = None 4518 if self._match_text_seq("NOWAIT"): 4519 wait = True 4520 elif self._match_text_seq("WAIT"): 4521 wait = self._parse_primary() 4522 elif self._match_text_seq("SKIP", "LOCKED"): 4523 wait = False 4524 4525 locks.append( 4526 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4527 ) 4528 4529 return locks 4530 4531 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4532 while this and self._match_set(self.SET_OPERATIONS): 4533 token_type = self._prev.token_type 4534 4535 if token_type == TokenType.UNION: 4536 operation: t.Type[exp.SetOperation] = exp.Union 4537 elif token_type == TokenType.EXCEPT: 4538 operation = exp.Except 4539 else: 4540 operation = exp.Intersect 4541 4542 comments = self._prev.comments 4543 4544 if self._match(TokenType.DISTINCT): 4545 distinct: t.Optional[bool] = True 4546 elif self._match(TokenType.ALL): 4547 distinct = False 4548 else: 4549 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4550 if distinct is None: 4551 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4552 4553 by_name = self._match_text_seq("BY", "NAME") 4554 expression = self._parse_select(nested=True, parse_set_operation=False) 4555 4556 this = self.expression( 4557 operation, 4558 comments=comments, 4559 this=this, 4560 distinct=distinct, 4561 by_name=by_name, 4562 expression=expression, 4563 ) 4564 4565 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4566 expression = this.expression 4567 4568 if expression: 4569 for arg in self.SET_OP_MODIFIERS: 4570 expr = expression.args.get(arg) 4571 if expr: 4572 this.set(arg, expr.pop()) 4573 4574 return this 4575 4576 def _parse_expression(self) -> t.Optional[exp.Expression]: 4577 return self._parse_alias(self._parse_assignment()) 4578 4579 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4580 this = self._parse_disjunction() 4581 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4582 # This allows us to parse <non-identifier token> := <expr> 4583 this = exp.column( 4584 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4585 ) 4586 4587 while self._match_set(self.ASSIGNMENT): 4588 if isinstance(this, exp.Column) and len(this.parts) == 1: 4589 this = this.this 4590 4591 this = self.expression( 4592 self.ASSIGNMENT[self._prev.token_type], 4593 this=this, 4594 comments=self._prev_comments, 4595 expression=self._parse_assignment(), 4596 ) 4597 4598 return this 4599 4600 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4601 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4602 4603 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4604 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4605 4606 def _parse_equality(self) -> t.Optional[exp.Expression]: 4607 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4608 4609 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4610 return self._parse_tokens(self._parse_range, self.COMPARISON) 4611 4612 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4613 this = this or self._parse_bitwise() 4614 negate = self._match(TokenType.NOT) 4615 4616 if self._match_set(self.RANGE_PARSERS): 4617 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4618 if not expression: 4619 return this 4620 4621 this = expression 4622 elif self._match(TokenType.ISNULL): 4623 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4624 4625 # Postgres supports ISNULL and NOTNULL for conditions. 4626 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4627 if self._match(TokenType.NOTNULL): 4628 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4629 this = self.expression(exp.Not, this=this) 4630 4631 if negate: 4632 this = self._negate_range(this) 4633 4634 if self._match(TokenType.IS): 4635 this = self._parse_is(this) 4636 4637 return this 4638 4639 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4640 if not this: 4641 return this 4642 4643 return self.expression(exp.Not, this=this) 4644 4645 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4646 index = self._index - 1 4647 negate = self._match(TokenType.NOT) 4648 4649 if self._match_text_seq("DISTINCT", "FROM"): 4650 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4651 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4652 4653 if self._match(TokenType.JSON): 4654 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4655 4656 if self._match_text_seq("WITH"): 4657 _with = True 4658 elif self._match_text_seq("WITHOUT"): 4659 _with = False 4660 else: 4661 _with = None 4662 4663 unique = self._match(TokenType.UNIQUE) 4664 self._match_text_seq("KEYS") 4665 expression: t.Optional[exp.Expression] = self.expression( 4666 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4667 ) 4668 else: 4669 expression = self._parse_primary() or self._parse_null() 4670 if not expression: 4671 self._retreat(index) 4672 return None 4673 4674 this = self.expression(exp.Is, this=this, expression=expression) 4675 return self.expression(exp.Not, this=this) if negate else this 4676 4677 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4678 unnest = self._parse_unnest(with_alias=False) 4679 if unnest: 4680 this = self.expression(exp.In, this=this, unnest=unnest) 4681 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4682 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4683 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4684 4685 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4686 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4687 else: 4688 this = self.expression(exp.In, this=this, expressions=expressions) 4689 4690 if matched_l_paren: 4691 self._match_r_paren(this) 4692 elif not self._match(TokenType.R_BRACKET, expression=this): 4693 self.raise_error("Expecting ]") 4694 else: 4695 this = self.expression(exp.In, this=this, field=self._parse_column()) 4696 4697 return this 4698 4699 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4700 low = self._parse_bitwise() 4701 self._match(TokenType.AND) 4702 high = self._parse_bitwise() 4703 return self.expression(exp.Between, this=this, low=low, high=high) 4704 4705 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4706 if not self._match(TokenType.ESCAPE): 4707 return this 4708 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4709 4710 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4711 index = self._index 4712 4713 if not self._match(TokenType.INTERVAL) and match_interval: 4714 return None 4715 4716 if self._match(TokenType.STRING, advance=False): 4717 this = self._parse_primary() 4718 else: 4719 this = self._parse_term() 4720 4721 if not this or ( 4722 isinstance(this, exp.Column) 4723 and not this.table 4724 and not this.this.quoted 4725 and this.name.upper() == "IS" 4726 ): 4727 self._retreat(index) 4728 return None 4729 4730 unit = self._parse_function() or ( 4731 not self._match(TokenType.ALIAS, advance=False) 4732 and self._parse_var(any_token=True, upper=True) 4733 ) 4734 4735 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4736 # each INTERVAL expression into this canonical form so it's easy to transpile 4737 if this and this.is_number: 4738 this = exp.Literal.string(this.to_py()) 4739 elif this and this.is_string: 4740 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4741 if parts and unit: 4742 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4743 unit = None 4744 self._retreat(self._index - 1) 4745 4746 if len(parts) == 1: 4747 this = exp.Literal.string(parts[0][0]) 4748 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4749 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4750 unit = self.expression( 4751 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4752 ) 4753 4754 interval = self.expression(exp.Interval, this=this, unit=unit) 4755 4756 index = self._index 4757 self._match(TokenType.PLUS) 4758 4759 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4760 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4761 return self.expression( 4762 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4763 ) 4764 4765 self._retreat(index) 4766 return interval 4767 4768 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4769 this = self._parse_term() 4770 4771 while True: 4772 if self._match_set(self.BITWISE): 4773 this = self.expression( 4774 self.BITWISE[self._prev.token_type], 4775 this=this, 4776 expression=self._parse_term(), 4777 ) 4778 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4779 this = self.expression( 4780 exp.DPipe, 4781 this=this, 4782 expression=self._parse_term(), 4783 safe=not self.dialect.STRICT_STRING_CONCAT, 4784 ) 4785 elif self._match(TokenType.DQMARK): 4786 this = self.expression( 4787 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4788 ) 4789 elif self._match_pair(TokenType.LT, TokenType.LT): 4790 this = self.expression( 4791 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4792 ) 4793 elif self._match_pair(TokenType.GT, TokenType.GT): 4794 this = self.expression( 4795 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4796 ) 4797 else: 4798 break 4799 4800 return this 4801 4802 def _parse_term(self) -> t.Optional[exp.Expression]: 4803 this = self._parse_factor() 4804 4805 while self._match_set(self.TERM): 4806 klass = self.TERM[self._prev.token_type] 4807 comments = self._prev_comments 4808 expression = self._parse_factor() 4809 4810 this = self.expression(klass, this=this, comments=comments, expression=expression) 4811 4812 if isinstance(this, exp.Collate): 4813 expr = this.expression 4814 4815 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4816 # fallback to Identifier / Var 4817 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4818 ident = expr.this 4819 if isinstance(ident, exp.Identifier): 4820 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4821 4822 return this 4823 4824 def _parse_factor(self) -> t.Optional[exp.Expression]: 4825 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4826 this = parse_method() 4827 4828 while self._match_set(self.FACTOR): 4829 klass = self.FACTOR[self._prev.token_type] 4830 comments = self._prev_comments 4831 expression = parse_method() 4832 4833 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4834 self._retreat(self._index - 1) 4835 return this 4836 4837 this = self.expression(klass, this=this, comments=comments, expression=expression) 4838 4839 if isinstance(this, exp.Div): 4840 this.args["typed"] = self.dialect.TYPED_DIVISION 4841 this.args["safe"] = self.dialect.SAFE_DIVISION 4842 4843 return this 4844 4845 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4846 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4847 4848 def _parse_unary(self) -> t.Optional[exp.Expression]: 4849 if self._match_set(self.UNARY_PARSERS): 4850 return self.UNARY_PARSERS[self._prev.token_type](self) 4851 return self._parse_at_time_zone(self._parse_type()) 4852 4853 def _parse_type( 4854 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4855 ) -> t.Optional[exp.Expression]: 4856 interval = parse_interval and self._parse_interval() 4857 if interval: 4858 return interval 4859 4860 index = self._index 4861 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4862 4863 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4864 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4865 if isinstance(data_type, exp.Cast): 4866 # This constructor can contain ops directly after it, for instance struct unnesting: 4867 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4868 return self._parse_column_ops(data_type) 4869 4870 if data_type: 4871 index2 = self._index 4872 this = self._parse_primary() 4873 4874 if isinstance(this, exp.Literal): 4875 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4876 if parser: 4877 return parser(self, this, data_type) 4878 4879 return self.expression(exp.Cast, this=this, to=data_type) 4880 4881 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4882 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4883 # 4884 # If the index difference here is greater than 1, that means the parser itself must have 4885 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4886 # 4887 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4888 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4889 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4890 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4891 # 4892 # In these cases, we don't really want to return the converted type, but instead retreat 4893 # and try to parse a Column or Identifier in the section below. 4894 if data_type.expressions and index2 - index > 1: 4895 self._retreat(index2) 4896 return self._parse_column_ops(data_type) 4897 4898 self._retreat(index) 4899 4900 if fallback_to_identifier: 4901 return self._parse_id_var() 4902 4903 this = self._parse_column() 4904 return this and self._parse_column_ops(this) 4905 4906 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4907 this = self._parse_type() 4908 if not this: 4909 return None 4910 4911 if isinstance(this, exp.Column) and not this.table: 4912 this = exp.var(this.name.upper()) 4913 4914 return self.expression( 4915 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4916 ) 4917 4918 def _parse_types( 4919 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4920 ) -> t.Optional[exp.Expression]: 4921 index = self._index 4922 4923 this: t.Optional[exp.Expression] = None 4924 prefix = self._match_text_seq("SYSUDTLIB", ".") 4925 4926 if not self._match_set(self.TYPE_TOKENS): 4927 identifier = allow_identifiers and self._parse_id_var( 4928 any_token=False, tokens=(TokenType.VAR,) 4929 ) 4930 if isinstance(identifier, exp.Identifier): 4931 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4932 4933 if len(tokens) != 1: 4934 self.raise_error("Unexpected identifier", self._prev) 4935 4936 if tokens[0].token_type in self.TYPE_TOKENS: 4937 self._prev = tokens[0] 4938 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4939 type_name = identifier.name 4940 4941 while self._match(TokenType.DOT): 4942 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4943 4944 this = exp.DataType.build(type_name, udt=True) 4945 else: 4946 self._retreat(self._index - 1) 4947 return None 4948 else: 4949 return None 4950 4951 type_token = self._prev.token_type 4952 4953 if type_token == TokenType.PSEUDO_TYPE: 4954 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4955 4956 if type_token == TokenType.OBJECT_IDENTIFIER: 4957 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4958 4959 # https://materialize.com/docs/sql/types/map/ 4960 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4961 key_type = self._parse_types( 4962 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4963 ) 4964 if not self._match(TokenType.FARROW): 4965 self._retreat(index) 4966 return None 4967 4968 value_type = self._parse_types( 4969 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4970 ) 4971 if not self._match(TokenType.R_BRACKET): 4972 self._retreat(index) 4973 return None 4974 4975 return exp.DataType( 4976 this=exp.DataType.Type.MAP, 4977 expressions=[key_type, value_type], 4978 nested=True, 4979 prefix=prefix, 4980 ) 4981 4982 nested = type_token in self.NESTED_TYPE_TOKENS 4983 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4984 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4985 expressions = None 4986 maybe_func = False 4987 4988 if self._match(TokenType.L_PAREN): 4989 if is_struct: 4990 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4991 elif nested: 4992 expressions = self._parse_csv( 4993 lambda: self._parse_types( 4994 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4995 ) 4996 ) 4997 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4998 this = expressions[0] 4999 this.set("nullable", True) 5000 self._match_r_paren() 5001 return this 5002 elif type_token in self.ENUM_TYPE_TOKENS: 5003 expressions = self._parse_csv(self._parse_equality) 5004 elif is_aggregate: 5005 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5006 any_token=False, tokens=(TokenType.VAR,) 5007 ) 5008 if not func_or_ident or not self._match(TokenType.COMMA): 5009 return None 5010 expressions = self._parse_csv( 5011 lambda: self._parse_types( 5012 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5013 ) 5014 ) 5015 expressions.insert(0, func_or_ident) 5016 else: 5017 expressions = self._parse_csv(self._parse_type_size) 5018 5019 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5020 if type_token == TokenType.VECTOR and len(expressions) == 2: 5021 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5022 5023 if not expressions or not self._match(TokenType.R_PAREN): 5024 self._retreat(index) 5025 return None 5026 5027 maybe_func = True 5028 5029 values: t.Optional[t.List[exp.Expression]] = None 5030 5031 if nested and self._match(TokenType.LT): 5032 if is_struct: 5033 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5034 else: 5035 expressions = self._parse_csv( 5036 lambda: self._parse_types( 5037 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5038 ) 5039 ) 5040 5041 if not self._match(TokenType.GT): 5042 self.raise_error("Expecting >") 5043 5044 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5045 values = self._parse_csv(self._parse_assignment) 5046 if not values and is_struct: 5047 values = None 5048 self._retreat(self._index - 1) 5049 else: 5050 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5051 5052 if type_token in self.TIMESTAMPS: 5053 if self._match_text_seq("WITH", "TIME", "ZONE"): 5054 maybe_func = False 5055 tz_type = ( 5056 exp.DataType.Type.TIMETZ 5057 if type_token in self.TIMES 5058 else exp.DataType.Type.TIMESTAMPTZ 5059 ) 5060 this = exp.DataType(this=tz_type, expressions=expressions) 5061 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5062 maybe_func = False 5063 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5064 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5065 maybe_func = False 5066 elif type_token == TokenType.INTERVAL: 5067 unit = self._parse_var(upper=True) 5068 if unit: 5069 if self._match_text_seq("TO"): 5070 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5071 5072 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5073 else: 5074 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5075 5076 if maybe_func and check_func: 5077 index2 = self._index 5078 peek = self._parse_string() 5079 5080 if not peek: 5081 self._retreat(index) 5082 return None 5083 5084 self._retreat(index2) 5085 5086 if not this: 5087 if self._match_text_seq("UNSIGNED"): 5088 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5089 if not unsigned_type_token: 5090 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5091 5092 type_token = unsigned_type_token or type_token 5093 5094 this = exp.DataType( 5095 this=exp.DataType.Type[type_token.value], 5096 expressions=expressions, 5097 nested=nested, 5098 prefix=prefix, 5099 ) 5100 5101 # Empty arrays/structs are allowed 5102 if values is not None: 5103 cls = exp.Struct if is_struct else exp.Array 5104 this = exp.cast(cls(expressions=values), this, copy=False) 5105 5106 elif expressions: 5107 this.set("expressions", expressions) 5108 5109 # https://materialize.com/docs/sql/types/list/#type-name 5110 while self._match(TokenType.LIST): 5111 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5112 5113 index = self._index 5114 5115 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5116 matched_array = self._match(TokenType.ARRAY) 5117 5118 while self._curr: 5119 datatype_token = self._prev.token_type 5120 matched_l_bracket = self._match(TokenType.L_BRACKET) 5121 5122 if (not matched_l_bracket and not matched_array) or ( 5123 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5124 ): 5125 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5126 # not to be confused with the fixed size array parsing 5127 break 5128 5129 matched_array = False 5130 values = self._parse_csv(self._parse_assignment) or None 5131 if ( 5132 values 5133 and not schema 5134 and ( 5135 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5136 ) 5137 ): 5138 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5139 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5140 self._retreat(index) 5141 break 5142 5143 this = exp.DataType( 5144 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5145 ) 5146 self._match(TokenType.R_BRACKET) 5147 5148 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5149 converter = self.TYPE_CONVERTERS.get(this.this) 5150 if converter: 5151 this = converter(t.cast(exp.DataType, this)) 5152 5153 return this 5154 5155 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5156 index = self._index 5157 5158 if ( 5159 self._curr 5160 and self._next 5161 and self._curr.token_type in self.TYPE_TOKENS 5162 and self._next.token_type in self.TYPE_TOKENS 5163 ): 5164 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5165 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5166 this = self._parse_id_var() 5167 else: 5168 this = ( 5169 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5170 or self._parse_id_var() 5171 ) 5172 5173 self._match(TokenType.COLON) 5174 5175 if ( 5176 type_required 5177 and not isinstance(this, exp.DataType) 5178 and not self._match_set(self.TYPE_TOKENS, advance=False) 5179 ): 5180 self._retreat(index) 5181 return self._parse_types() 5182 5183 return self._parse_column_def(this) 5184 5185 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5186 if not self._match_text_seq("AT", "TIME", "ZONE"): 5187 return this 5188 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5189 5190 def _parse_column(self) -> t.Optional[exp.Expression]: 5191 this = self._parse_column_reference() 5192 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5193 5194 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5195 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5196 5197 return column 5198 5199 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5200 this = self._parse_field() 5201 if ( 5202 not this 5203 and self._match(TokenType.VALUES, advance=False) 5204 and self.VALUES_FOLLOWED_BY_PAREN 5205 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5206 ): 5207 this = self._parse_id_var() 5208 5209 if isinstance(this, exp.Identifier): 5210 # We bubble up comments from the Identifier to the Column 5211 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5212 5213 return this 5214 5215 def _parse_colon_as_variant_extract( 5216 self, this: t.Optional[exp.Expression] 5217 ) -> t.Optional[exp.Expression]: 5218 casts = [] 5219 json_path = [] 5220 escape = None 5221 5222 while self._match(TokenType.COLON): 5223 start_index = self._index 5224 5225 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5226 path = self._parse_column_ops( 5227 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5228 ) 5229 5230 # The cast :: operator has a lower precedence than the extraction operator :, so 5231 # we rearrange the AST appropriately to avoid casting the JSON path 5232 while isinstance(path, exp.Cast): 5233 casts.append(path.to) 5234 path = path.this 5235 5236 if casts: 5237 dcolon_offset = next( 5238 i 5239 for i, t in enumerate(self._tokens[start_index:]) 5240 if t.token_type == TokenType.DCOLON 5241 ) 5242 end_token = self._tokens[start_index + dcolon_offset - 1] 5243 else: 5244 end_token = self._prev 5245 5246 if path: 5247 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5248 # it'll roundtrip to a string literal in GET_PATH 5249 if isinstance(path, exp.Identifier) and path.quoted: 5250 escape = True 5251 5252 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5253 5254 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5255 # Databricks transforms it back to the colon/dot notation 5256 if json_path: 5257 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5258 5259 if json_path_expr: 5260 json_path_expr.set("escape", escape) 5261 5262 this = self.expression( 5263 exp.JSONExtract, 5264 this=this, 5265 expression=json_path_expr, 5266 variant_extract=True, 5267 ) 5268 5269 while casts: 5270 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5271 5272 return this 5273 5274 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5275 return self._parse_types() 5276 5277 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5278 this = self._parse_bracket(this) 5279 5280 while self._match_set(self.COLUMN_OPERATORS): 5281 op_token = self._prev.token_type 5282 op = self.COLUMN_OPERATORS.get(op_token) 5283 5284 if op_token == TokenType.DCOLON: 5285 field = self._parse_dcolon() 5286 if not field: 5287 self.raise_error("Expected type") 5288 elif op and self._curr: 5289 field = self._parse_column_reference() or self._parse_bracket() 5290 else: 5291 field = self._parse_field(any_token=True, anonymous_func=True) 5292 5293 if isinstance(field, (exp.Func, exp.Window)) and this: 5294 # BQ & snowflake allow function calls like x.y.count(...), SAFE.SUBSTR(...) etc 5295 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5296 this = exp.replace_tree( 5297 this, 5298 lambda n: ( 5299 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5300 if n.table 5301 else n.this 5302 ) 5303 if isinstance(n, exp.Column) 5304 else n, 5305 ) 5306 5307 if op: 5308 this = op(self, this, field) 5309 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5310 this = self.expression( 5311 exp.Column, 5312 comments=this.comments, 5313 this=field, 5314 table=this.this, 5315 db=this.args.get("table"), 5316 catalog=this.args.get("db"), 5317 ) 5318 elif isinstance(field, exp.Window): 5319 # Move the exp.Dot's to the window's function 5320 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5321 field.set("this", window_func) 5322 this = field 5323 else: 5324 this = self.expression(exp.Dot, this=this, expression=field) 5325 5326 if field and field.comments: 5327 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5328 5329 this = self._parse_bracket(this) 5330 5331 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5332 5333 def _parse_primary(self) -> t.Optional[exp.Expression]: 5334 if self._match_set(self.PRIMARY_PARSERS): 5335 token_type = self._prev.token_type 5336 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5337 5338 if token_type == TokenType.STRING: 5339 expressions = [primary] 5340 while self._match(TokenType.STRING): 5341 expressions.append(exp.Literal.string(self._prev.text)) 5342 5343 if len(expressions) > 1: 5344 return self.expression(exp.Concat, expressions=expressions) 5345 5346 return primary 5347 5348 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5349 return exp.Literal.number(f"0.{self._prev.text}") 5350 5351 if self._match(TokenType.L_PAREN): 5352 comments = self._prev_comments 5353 query = self._parse_select() 5354 5355 if query: 5356 expressions = [query] 5357 else: 5358 expressions = self._parse_expressions() 5359 5360 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5361 5362 if not this and self._match(TokenType.R_PAREN, advance=False): 5363 this = self.expression(exp.Tuple) 5364 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5365 this = self._parse_subquery(this=this, parse_alias=False) 5366 elif isinstance(this, exp.Subquery): 5367 this = self._parse_subquery( 5368 this=self._parse_set_operations(this), parse_alias=False 5369 ) 5370 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5371 this = self.expression(exp.Tuple, expressions=expressions) 5372 else: 5373 this = self.expression(exp.Paren, this=this) 5374 5375 if this: 5376 this.add_comments(comments) 5377 5378 self._match_r_paren(expression=this) 5379 return this 5380 5381 return None 5382 5383 def _parse_field( 5384 self, 5385 any_token: bool = False, 5386 tokens: t.Optional[t.Collection[TokenType]] = None, 5387 anonymous_func: bool = False, 5388 ) -> t.Optional[exp.Expression]: 5389 if anonymous_func: 5390 field = ( 5391 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5392 or self._parse_primary() 5393 ) 5394 else: 5395 field = self._parse_primary() or self._parse_function( 5396 anonymous=anonymous_func, any_token=any_token 5397 ) 5398 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5399 5400 def _parse_function( 5401 self, 5402 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5403 anonymous: bool = False, 5404 optional_parens: bool = True, 5405 any_token: bool = False, 5406 ) -> t.Optional[exp.Expression]: 5407 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5408 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5409 fn_syntax = False 5410 if ( 5411 self._match(TokenType.L_BRACE, advance=False) 5412 and self._next 5413 and self._next.text.upper() == "FN" 5414 ): 5415 self._advance(2) 5416 fn_syntax = True 5417 5418 func = self._parse_function_call( 5419 functions=functions, 5420 anonymous=anonymous, 5421 optional_parens=optional_parens, 5422 any_token=any_token, 5423 ) 5424 5425 if fn_syntax: 5426 self._match(TokenType.R_BRACE) 5427 5428 return func 5429 5430 def _parse_function_call( 5431 self, 5432 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5433 anonymous: bool = False, 5434 optional_parens: bool = True, 5435 any_token: bool = False, 5436 ) -> t.Optional[exp.Expression]: 5437 if not self._curr: 5438 return None 5439 5440 comments = self._curr.comments 5441 token_type = self._curr.token_type 5442 this = self._curr.text 5443 upper = this.upper() 5444 5445 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5446 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5447 self._advance() 5448 return self._parse_window(parser(self)) 5449 5450 if not self._next or self._next.token_type != TokenType.L_PAREN: 5451 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5452 self._advance() 5453 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5454 5455 return None 5456 5457 if any_token: 5458 if token_type in self.RESERVED_TOKENS: 5459 return None 5460 elif token_type not in self.FUNC_TOKENS: 5461 return None 5462 5463 self._advance(2) 5464 5465 parser = self.FUNCTION_PARSERS.get(upper) 5466 if parser and not anonymous: 5467 this = parser(self) 5468 else: 5469 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5470 5471 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5472 this = self.expression( 5473 subquery_predicate, comments=comments, this=self._parse_select() 5474 ) 5475 self._match_r_paren() 5476 return this 5477 5478 if functions is None: 5479 functions = self.FUNCTIONS 5480 5481 function = functions.get(upper) 5482 known_function = function and not anonymous 5483 5484 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5485 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5486 5487 post_func_comments = self._curr and self._curr.comments 5488 if known_function and post_func_comments: 5489 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5490 # call we'll construct it as exp.Anonymous, even if it's "known" 5491 if any( 5492 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5493 for comment in post_func_comments 5494 ): 5495 known_function = False 5496 5497 if alias and known_function: 5498 args = self._kv_to_prop_eq(args) 5499 5500 if known_function: 5501 func_builder = t.cast(t.Callable, function) 5502 5503 if "dialect" in func_builder.__code__.co_varnames: 5504 func = func_builder(args, dialect=self.dialect) 5505 else: 5506 func = func_builder(args) 5507 5508 func = self.validate_expression(func, args) 5509 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5510 func.meta["name"] = this 5511 5512 this = func 5513 else: 5514 if token_type == TokenType.IDENTIFIER: 5515 this = exp.Identifier(this=this, quoted=True) 5516 this = self.expression(exp.Anonymous, this=this, expressions=args) 5517 5518 if isinstance(this, exp.Expression): 5519 this.add_comments(comments) 5520 5521 self._match_r_paren(this) 5522 return self._parse_window(this) 5523 5524 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5525 return expression 5526 5527 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5528 transformed = [] 5529 5530 for index, e in enumerate(expressions): 5531 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5532 if isinstance(e, exp.Alias): 5533 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5534 5535 if not isinstance(e, exp.PropertyEQ): 5536 e = self.expression( 5537 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5538 ) 5539 5540 if isinstance(e.this, exp.Column): 5541 e.this.replace(e.this.this) 5542 else: 5543 e = self._to_prop_eq(e, index) 5544 5545 transformed.append(e) 5546 5547 return transformed 5548 5549 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5550 return self._parse_statement() 5551 5552 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5553 return self._parse_column_def(self._parse_id_var()) 5554 5555 def _parse_user_defined_function( 5556 self, kind: t.Optional[TokenType] = None 5557 ) -> t.Optional[exp.Expression]: 5558 this = self._parse_id_var() 5559 5560 while self._match(TokenType.DOT): 5561 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5562 5563 if not self._match(TokenType.L_PAREN): 5564 return this 5565 5566 expressions = self._parse_csv(self._parse_function_parameter) 5567 self._match_r_paren() 5568 return self.expression( 5569 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5570 ) 5571 5572 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5573 literal = self._parse_primary() 5574 if literal: 5575 return self.expression(exp.Introducer, this=token.text, expression=literal) 5576 5577 return self.expression(exp.Identifier, this=token.text) 5578 5579 def _parse_session_parameter(self) -> exp.SessionParameter: 5580 kind = None 5581 this = self._parse_id_var() or self._parse_primary() 5582 5583 if this and self._match(TokenType.DOT): 5584 kind = this.name 5585 this = self._parse_var() or self._parse_primary() 5586 5587 return self.expression(exp.SessionParameter, this=this, kind=kind) 5588 5589 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5590 return self._parse_id_var() 5591 5592 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5593 index = self._index 5594 5595 if self._match(TokenType.L_PAREN): 5596 expressions = t.cast( 5597 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5598 ) 5599 5600 if not self._match(TokenType.R_PAREN): 5601 self._retreat(index) 5602 else: 5603 expressions = [self._parse_lambda_arg()] 5604 5605 if self._match_set(self.LAMBDAS): 5606 return self.LAMBDAS[self._prev.token_type](self, expressions) 5607 5608 self._retreat(index) 5609 5610 this: t.Optional[exp.Expression] 5611 5612 if self._match(TokenType.DISTINCT): 5613 this = self.expression( 5614 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5615 ) 5616 else: 5617 this = self._parse_select_or_expression(alias=alias) 5618 5619 return self._parse_limit( 5620 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5621 ) 5622 5623 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5624 index = self._index 5625 if not self._match(TokenType.L_PAREN): 5626 return this 5627 5628 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5629 # expr can be of both types 5630 if self._match_set(self.SELECT_START_TOKENS): 5631 self._retreat(index) 5632 return this 5633 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5634 self._match_r_paren() 5635 return self.expression(exp.Schema, this=this, expressions=args) 5636 5637 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5638 return self._parse_column_def(self._parse_field(any_token=True)) 5639 5640 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5641 # column defs are not really columns, they're identifiers 5642 if isinstance(this, exp.Column): 5643 this = this.this 5644 5645 kind = self._parse_types(schema=True) 5646 5647 if self._match_text_seq("FOR", "ORDINALITY"): 5648 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5649 5650 constraints: t.List[exp.Expression] = [] 5651 5652 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5653 ("ALIAS", "MATERIALIZED") 5654 ): 5655 persisted = self._prev.text.upper() == "MATERIALIZED" 5656 constraint_kind = exp.ComputedColumnConstraint( 5657 this=self._parse_assignment(), 5658 persisted=persisted or self._match_text_seq("PERSISTED"), 5659 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5660 ) 5661 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5662 elif ( 5663 kind 5664 and self._match(TokenType.ALIAS, advance=False) 5665 and ( 5666 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 5667 or (self._next and self._next.token_type == TokenType.L_PAREN) 5668 ) 5669 ): 5670 self._advance() 5671 constraints.append( 5672 self.expression( 5673 exp.ColumnConstraint, 5674 kind=exp.TransformColumnConstraint(this=self._parse_disjunction()), 5675 ) 5676 ) 5677 5678 while True: 5679 constraint = self._parse_column_constraint() 5680 if not constraint: 5681 break 5682 constraints.append(constraint) 5683 5684 if not kind and not constraints: 5685 return this 5686 5687 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5688 5689 def _parse_auto_increment( 5690 self, 5691 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5692 start = None 5693 increment = None 5694 5695 if self._match(TokenType.L_PAREN, advance=False): 5696 args = self._parse_wrapped_csv(self._parse_bitwise) 5697 start = seq_get(args, 0) 5698 increment = seq_get(args, 1) 5699 elif self._match_text_seq("START"): 5700 start = self._parse_bitwise() 5701 self._match_text_seq("INCREMENT") 5702 increment = self._parse_bitwise() 5703 5704 if start and increment: 5705 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5706 5707 return exp.AutoIncrementColumnConstraint() 5708 5709 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5710 if not self._match_text_seq("REFRESH"): 5711 self._retreat(self._index - 1) 5712 return None 5713 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5714 5715 def _parse_compress(self) -> exp.CompressColumnConstraint: 5716 if self._match(TokenType.L_PAREN, advance=False): 5717 return self.expression( 5718 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5719 ) 5720 5721 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5722 5723 def _parse_generated_as_identity( 5724 self, 5725 ) -> ( 5726 exp.GeneratedAsIdentityColumnConstraint 5727 | exp.ComputedColumnConstraint 5728 | exp.GeneratedAsRowColumnConstraint 5729 ): 5730 if self._match_text_seq("BY", "DEFAULT"): 5731 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5732 this = self.expression( 5733 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5734 ) 5735 else: 5736 self._match_text_seq("ALWAYS") 5737 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5738 5739 self._match(TokenType.ALIAS) 5740 5741 if self._match_text_seq("ROW"): 5742 start = self._match_text_seq("START") 5743 if not start: 5744 self._match(TokenType.END) 5745 hidden = self._match_text_seq("HIDDEN") 5746 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5747 5748 identity = self._match_text_seq("IDENTITY") 5749 5750 if self._match(TokenType.L_PAREN): 5751 if self._match(TokenType.START_WITH): 5752 this.set("start", self._parse_bitwise()) 5753 if self._match_text_seq("INCREMENT", "BY"): 5754 this.set("increment", self._parse_bitwise()) 5755 if self._match_text_seq("MINVALUE"): 5756 this.set("minvalue", self._parse_bitwise()) 5757 if self._match_text_seq("MAXVALUE"): 5758 this.set("maxvalue", self._parse_bitwise()) 5759 5760 if self._match_text_seq("CYCLE"): 5761 this.set("cycle", True) 5762 elif self._match_text_seq("NO", "CYCLE"): 5763 this.set("cycle", False) 5764 5765 if not identity: 5766 this.set("expression", self._parse_range()) 5767 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5768 args = self._parse_csv(self._parse_bitwise) 5769 this.set("start", seq_get(args, 0)) 5770 this.set("increment", seq_get(args, 1)) 5771 5772 self._match_r_paren() 5773 5774 return this 5775 5776 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5777 self._match_text_seq("LENGTH") 5778 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5779 5780 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5781 if self._match_text_seq("NULL"): 5782 return self.expression(exp.NotNullColumnConstraint) 5783 if self._match_text_seq("CASESPECIFIC"): 5784 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5785 if self._match_text_seq("FOR", "REPLICATION"): 5786 return self.expression(exp.NotForReplicationColumnConstraint) 5787 5788 # Unconsume the `NOT` token 5789 self._retreat(self._index - 1) 5790 return None 5791 5792 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5793 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 5794 5795 procedure_option_follows = ( 5796 self._match(TokenType.WITH, advance=False) 5797 and self._next 5798 and self._next.text.upper() in self.PROCEDURE_OPTIONS 5799 ) 5800 5801 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 5802 return self.expression( 5803 exp.ColumnConstraint, 5804 this=this, 5805 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5806 ) 5807 5808 return this 5809 5810 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5811 if not self._match(TokenType.CONSTRAINT): 5812 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5813 5814 return self.expression( 5815 exp.Constraint, 5816 this=self._parse_id_var(), 5817 expressions=self._parse_unnamed_constraints(), 5818 ) 5819 5820 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5821 constraints = [] 5822 while True: 5823 constraint = self._parse_unnamed_constraint() or self._parse_function() 5824 if not constraint: 5825 break 5826 constraints.append(constraint) 5827 5828 return constraints 5829 5830 def _parse_unnamed_constraint( 5831 self, constraints: t.Optional[t.Collection[str]] = None 5832 ) -> t.Optional[exp.Expression]: 5833 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5834 constraints or self.CONSTRAINT_PARSERS 5835 ): 5836 return None 5837 5838 constraint = self._prev.text.upper() 5839 if constraint not in self.CONSTRAINT_PARSERS: 5840 self.raise_error(f"No parser found for schema constraint {constraint}.") 5841 5842 return self.CONSTRAINT_PARSERS[constraint](self) 5843 5844 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5845 return self._parse_id_var(any_token=False) 5846 5847 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5848 self._match_text_seq("KEY") 5849 return self.expression( 5850 exp.UniqueColumnConstraint, 5851 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5852 this=self._parse_schema(self._parse_unique_key()), 5853 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5854 on_conflict=self._parse_on_conflict(), 5855 ) 5856 5857 def _parse_key_constraint_options(self) -> t.List[str]: 5858 options = [] 5859 while True: 5860 if not self._curr: 5861 break 5862 5863 if self._match(TokenType.ON): 5864 action = None 5865 on = self._advance_any() and self._prev.text 5866 5867 if self._match_text_seq("NO", "ACTION"): 5868 action = "NO ACTION" 5869 elif self._match_text_seq("CASCADE"): 5870 action = "CASCADE" 5871 elif self._match_text_seq("RESTRICT"): 5872 action = "RESTRICT" 5873 elif self._match_pair(TokenType.SET, TokenType.NULL): 5874 action = "SET NULL" 5875 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5876 action = "SET DEFAULT" 5877 else: 5878 self.raise_error("Invalid key constraint") 5879 5880 options.append(f"ON {on} {action}") 5881 else: 5882 var = self._parse_var_from_options( 5883 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5884 ) 5885 if not var: 5886 break 5887 options.append(var.name) 5888 5889 return options 5890 5891 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5892 if match and not self._match(TokenType.REFERENCES): 5893 return None 5894 5895 expressions = None 5896 this = self._parse_table(schema=True) 5897 options = self._parse_key_constraint_options() 5898 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5899 5900 def _parse_foreign_key(self) -> exp.ForeignKey: 5901 expressions = self._parse_wrapped_id_vars() 5902 reference = self._parse_references() 5903 options = {} 5904 5905 while self._match(TokenType.ON): 5906 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5907 self.raise_error("Expected DELETE or UPDATE") 5908 5909 kind = self._prev.text.lower() 5910 5911 if self._match_text_seq("NO", "ACTION"): 5912 action = "NO ACTION" 5913 elif self._match(TokenType.SET): 5914 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5915 action = "SET " + self._prev.text.upper() 5916 else: 5917 self._advance() 5918 action = self._prev.text.upper() 5919 5920 options[kind] = action 5921 5922 return self.expression( 5923 exp.ForeignKey, 5924 expressions=expressions, 5925 reference=reference, 5926 **options, # type: ignore 5927 ) 5928 5929 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5930 return self._parse_ordered() or self._parse_field() 5931 5932 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5933 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5934 self._retreat(self._index - 1) 5935 return None 5936 5937 id_vars = self._parse_wrapped_id_vars() 5938 return self.expression( 5939 exp.PeriodForSystemTimeConstraint, 5940 this=seq_get(id_vars, 0), 5941 expression=seq_get(id_vars, 1), 5942 ) 5943 5944 def _parse_primary_key( 5945 self, wrapped_optional: bool = False, in_props: bool = False 5946 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5947 desc = ( 5948 self._match_set((TokenType.ASC, TokenType.DESC)) 5949 and self._prev.token_type == TokenType.DESC 5950 ) 5951 5952 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5953 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5954 5955 expressions = self._parse_wrapped_csv( 5956 self._parse_primary_key_part, optional=wrapped_optional 5957 ) 5958 options = self._parse_key_constraint_options() 5959 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5960 5961 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5962 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5963 5964 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5965 """ 5966 Parses a datetime column in ODBC format. We parse the column into the corresponding 5967 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5968 same as we did for `DATE('yyyy-mm-dd')`. 5969 5970 Reference: 5971 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5972 """ 5973 self._match(TokenType.VAR) 5974 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5975 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5976 if not self._match(TokenType.R_BRACE): 5977 self.raise_error("Expected }") 5978 return expression 5979 5980 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5981 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5982 return this 5983 5984 bracket_kind = self._prev.token_type 5985 if ( 5986 bracket_kind == TokenType.L_BRACE 5987 and self._curr 5988 and self._curr.token_type == TokenType.VAR 5989 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5990 ): 5991 return self._parse_odbc_datetime_literal() 5992 5993 expressions = self._parse_csv( 5994 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5995 ) 5996 5997 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5998 self.raise_error("Expected ]") 5999 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6000 self.raise_error("Expected }") 6001 6002 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6003 if bracket_kind == TokenType.L_BRACE: 6004 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 6005 elif not this: 6006 this = build_array_constructor( 6007 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6008 ) 6009 else: 6010 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6011 if constructor_type: 6012 return build_array_constructor( 6013 constructor_type, 6014 args=expressions, 6015 bracket_kind=bracket_kind, 6016 dialect=self.dialect, 6017 ) 6018 6019 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 6020 this = self.expression(exp.Bracket, this=this, expressions=expressions) 6021 6022 self._add_comments(this) 6023 return self._parse_bracket(this) 6024 6025 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6026 if self._match(TokenType.COLON): 6027 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6028 return this 6029 6030 def _parse_case(self) -> t.Optional[exp.Expression]: 6031 ifs = [] 6032 default = None 6033 6034 comments = self._prev_comments 6035 expression = self._parse_assignment() 6036 6037 while self._match(TokenType.WHEN): 6038 this = self._parse_assignment() 6039 self._match(TokenType.THEN) 6040 then = self._parse_assignment() 6041 ifs.append(self.expression(exp.If, this=this, true=then)) 6042 6043 if self._match(TokenType.ELSE): 6044 default = self._parse_assignment() 6045 6046 if not self._match(TokenType.END): 6047 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6048 default = exp.column("interval") 6049 else: 6050 self.raise_error("Expected END after CASE", self._prev) 6051 6052 return self.expression( 6053 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6054 ) 6055 6056 def _parse_if(self) -> t.Optional[exp.Expression]: 6057 if self._match(TokenType.L_PAREN): 6058 args = self._parse_csv(self._parse_assignment) 6059 this = self.validate_expression(exp.If.from_arg_list(args), args) 6060 self._match_r_paren() 6061 else: 6062 index = self._index - 1 6063 6064 if self.NO_PAREN_IF_COMMANDS and index == 0: 6065 return self._parse_as_command(self._prev) 6066 6067 condition = self._parse_assignment() 6068 6069 if not condition: 6070 self._retreat(index) 6071 return None 6072 6073 self._match(TokenType.THEN) 6074 true = self._parse_assignment() 6075 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6076 self._match(TokenType.END) 6077 this = self.expression(exp.If, this=condition, true=true, false=false) 6078 6079 return this 6080 6081 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6082 if not self._match_text_seq("VALUE", "FOR"): 6083 self._retreat(self._index - 1) 6084 return None 6085 6086 return self.expression( 6087 exp.NextValueFor, 6088 this=self._parse_column(), 6089 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6090 ) 6091 6092 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6093 this = self._parse_function() or self._parse_var_or_string(upper=True) 6094 6095 if self._match(TokenType.FROM): 6096 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6097 6098 if not self._match(TokenType.COMMA): 6099 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6100 6101 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6102 6103 def _parse_gap_fill(self) -> exp.GapFill: 6104 self._match(TokenType.TABLE) 6105 this = self._parse_table() 6106 6107 self._match(TokenType.COMMA) 6108 args = [this, *self._parse_csv(self._parse_lambda)] 6109 6110 gap_fill = exp.GapFill.from_arg_list(args) 6111 return self.validate_expression(gap_fill, args) 6112 6113 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6114 this = self._parse_assignment() 6115 6116 if not self._match(TokenType.ALIAS): 6117 if self._match(TokenType.COMMA): 6118 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6119 6120 self.raise_error("Expected AS after CAST") 6121 6122 fmt = None 6123 to = self._parse_types() 6124 6125 default = self._match(TokenType.DEFAULT) 6126 if default: 6127 default = self._parse_bitwise() 6128 self._match_text_seq("ON", "CONVERSION", "ERROR") 6129 6130 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6131 fmt_string = self._parse_string() 6132 fmt = self._parse_at_time_zone(fmt_string) 6133 6134 if not to: 6135 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6136 if to.this in exp.DataType.TEMPORAL_TYPES: 6137 this = self.expression( 6138 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6139 this=this, 6140 format=exp.Literal.string( 6141 format_time( 6142 fmt_string.this if fmt_string else "", 6143 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6144 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6145 ) 6146 ), 6147 safe=safe, 6148 ) 6149 6150 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6151 this.set("zone", fmt.args["zone"]) 6152 return this 6153 elif not to: 6154 self.raise_error("Expected TYPE after CAST") 6155 elif isinstance(to, exp.Identifier): 6156 to = exp.DataType.build(to.name, udt=True) 6157 elif to.this == exp.DataType.Type.CHAR: 6158 if self._match(TokenType.CHARACTER_SET): 6159 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6160 6161 return self.expression( 6162 exp.Cast if strict else exp.TryCast, 6163 this=this, 6164 to=to, 6165 format=fmt, 6166 safe=safe, 6167 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6168 default=default, 6169 ) 6170 6171 def _parse_string_agg(self) -> exp.GroupConcat: 6172 if self._match(TokenType.DISTINCT): 6173 args: t.List[t.Optional[exp.Expression]] = [ 6174 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6175 ] 6176 if self._match(TokenType.COMMA): 6177 args.extend(self._parse_csv(self._parse_assignment)) 6178 else: 6179 args = self._parse_csv(self._parse_assignment) # type: ignore 6180 6181 if self._match_text_seq("ON", "OVERFLOW"): 6182 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6183 if self._match_text_seq("ERROR"): 6184 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6185 else: 6186 self._match_text_seq("TRUNCATE") 6187 on_overflow = self.expression( 6188 exp.OverflowTruncateBehavior, 6189 this=self._parse_string(), 6190 with_count=( 6191 self._match_text_seq("WITH", "COUNT") 6192 or not self._match_text_seq("WITHOUT", "COUNT") 6193 ), 6194 ) 6195 else: 6196 on_overflow = None 6197 6198 index = self._index 6199 if not self._match(TokenType.R_PAREN) and args: 6200 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6201 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6202 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 6203 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6204 6205 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6206 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6207 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6208 if not self._match_text_seq("WITHIN", "GROUP"): 6209 self._retreat(index) 6210 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6211 6212 # The corresponding match_r_paren will be called in parse_function (caller) 6213 self._match_l_paren() 6214 6215 return self.expression( 6216 exp.GroupConcat, 6217 this=self._parse_order(this=seq_get(args, 0)), 6218 separator=seq_get(args, 1), 6219 on_overflow=on_overflow, 6220 ) 6221 6222 def _parse_convert( 6223 self, strict: bool, safe: t.Optional[bool] = None 6224 ) -> t.Optional[exp.Expression]: 6225 this = self._parse_bitwise() 6226 6227 if self._match(TokenType.USING): 6228 to: t.Optional[exp.Expression] = self.expression( 6229 exp.CharacterSet, this=self._parse_var() 6230 ) 6231 elif self._match(TokenType.COMMA): 6232 to = self._parse_types() 6233 else: 6234 to = None 6235 6236 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 6237 6238 def _parse_xml_table(self) -> exp.XMLTable: 6239 namespaces = None 6240 passing = None 6241 columns = None 6242 6243 if self._match_text_seq("XMLNAMESPACES", "("): 6244 namespaces = self._parse_xml_namespace() 6245 self._match_text_seq(")", ",") 6246 6247 this = self._parse_string() 6248 6249 if self._match_text_seq("PASSING"): 6250 # The BY VALUE keywords are optional and are provided for semantic clarity 6251 self._match_text_seq("BY", "VALUE") 6252 passing = self._parse_csv(self._parse_column) 6253 6254 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6255 6256 if self._match_text_seq("COLUMNS"): 6257 columns = self._parse_csv(self._parse_field_def) 6258 6259 return self.expression( 6260 exp.XMLTable, 6261 this=this, 6262 namespaces=namespaces, 6263 passing=passing, 6264 columns=columns, 6265 by_ref=by_ref, 6266 ) 6267 6268 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6269 namespaces = [] 6270 6271 while True: 6272 if self._match(TokenType.DEFAULT): 6273 uri = self._parse_string() 6274 else: 6275 uri = self._parse_alias(self._parse_string()) 6276 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6277 if not self._match(TokenType.COMMA): 6278 break 6279 6280 return namespaces 6281 6282 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 6283 """ 6284 There are generally two variants of the DECODE function: 6285 6286 - DECODE(bin, charset) 6287 - DECODE(expression, search, result [, search, result] ... [, default]) 6288 6289 The second variant will always be parsed into a CASE expression. Note that NULL 6290 needs special treatment, since we need to explicitly check for it with `IS NULL`, 6291 instead of relying on pattern matching. 6292 """ 6293 args = self._parse_csv(self._parse_assignment) 6294 6295 if len(args) < 3: 6296 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6297 6298 expression, *expressions = args 6299 if not expression: 6300 return None 6301 6302 ifs = [] 6303 for search, result in zip(expressions[::2], expressions[1::2]): 6304 if not search or not result: 6305 return None 6306 6307 if isinstance(search, exp.Literal): 6308 ifs.append( 6309 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 6310 ) 6311 elif isinstance(search, exp.Null): 6312 ifs.append( 6313 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 6314 ) 6315 else: 6316 cond = exp.or_( 6317 exp.EQ(this=expression.copy(), expression=search), 6318 exp.and_( 6319 exp.Is(this=expression.copy(), expression=exp.Null()), 6320 exp.Is(this=search.copy(), expression=exp.Null()), 6321 copy=False, 6322 ), 6323 copy=False, 6324 ) 6325 ifs.append(exp.If(this=cond, true=result)) 6326 6327 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 6328 6329 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6330 self._match_text_seq("KEY") 6331 key = self._parse_column() 6332 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6333 self._match_text_seq("VALUE") 6334 value = self._parse_bitwise() 6335 6336 if not key and not value: 6337 return None 6338 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6339 6340 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6341 if not this or not self._match_text_seq("FORMAT", "JSON"): 6342 return this 6343 6344 return self.expression(exp.FormatJson, this=this) 6345 6346 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6347 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6348 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6349 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6350 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6351 else: 6352 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6353 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6354 6355 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6356 6357 if not empty and not error and not null: 6358 return None 6359 6360 return self.expression( 6361 exp.OnCondition, 6362 empty=empty, 6363 error=error, 6364 null=null, 6365 ) 6366 6367 def _parse_on_handling( 6368 self, on: str, *values: str 6369 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6370 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6371 for value in values: 6372 if self._match_text_seq(value, "ON", on): 6373 return f"{value} ON {on}" 6374 6375 index = self._index 6376 if self._match(TokenType.DEFAULT): 6377 default_value = self._parse_bitwise() 6378 if self._match_text_seq("ON", on): 6379 return default_value 6380 6381 self._retreat(index) 6382 6383 return None 6384 6385 @t.overload 6386 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6387 6388 @t.overload 6389 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6390 6391 def _parse_json_object(self, agg=False): 6392 star = self._parse_star() 6393 expressions = ( 6394 [star] 6395 if star 6396 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6397 ) 6398 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6399 6400 unique_keys = None 6401 if self._match_text_seq("WITH", "UNIQUE"): 6402 unique_keys = True 6403 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6404 unique_keys = False 6405 6406 self._match_text_seq("KEYS") 6407 6408 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6409 self._parse_type() 6410 ) 6411 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6412 6413 return self.expression( 6414 exp.JSONObjectAgg if agg else exp.JSONObject, 6415 expressions=expressions, 6416 null_handling=null_handling, 6417 unique_keys=unique_keys, 6418 return_type=return_type, 6419 encoding=encoding, 6420 ) 6421 6422 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6423 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6424 if not self._match_text_seq("NESTED"): 6425 this = self._parse_id_var() 6426 kind = self._parse_types(allow_identifiers=False) 6427 nested = None 6428 else: 6429 this = None 6430 kind = None 6431 nested = True 6432 6433 path = self._match_text_seq("PATH") and self._parse_string() 6434 nested_schema = nested and self._parse_json_schema() 6435 6436 return self.expression( 6437 exp.JSONColumnDef, 6438 this=this, 6439 kind=kind, 6440 path=path, 6441 nested_schema=nested_schema, 6442 ) 6443 6444 def _parse_json_schema(self) -> exp.JSONSchema: 6445 self._match_text_seq("COLUMNS") 6446 return self.expression( 6447 exp.JSONSchema, 6448 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6449 ) 6450 6451 def _parse_json_table(self) -> exp.JSONTable: 6452 this = self._parse_format_json(self._parse_bitwise()) 6453 path = self._match(TokenType.COMMA) and self._parse_string() 6454 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6455 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6456 schema = self._parse_json_schema() 6457 6458 return exp.JSONTable( 6459 this=this, 6460 schema=schema, 6461 path=path, 6462 error_handling=error_handling, 6463 empty_handling=empty_handling, 6464 ) 6465 6466 def _parse_match_against(self) -> exp.MatchAgainst: 6467 expressions = self._parse_csv(self._parse_column) 6468 6469 self._match_text_seq(")", "AGAINST", "(") 6470 6471 this = self._parse_string() 6472 6473 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6474 modifier = "IN NATURAL LANGUAGE MODE" 6475 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6476 modifier = f"{modifier} WITH QUERY EXPANSION" 6477 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6478 modifier = "IN BOOLEAN MODE" 6479 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6480 modifier = "WITH QUERY EXPANSION" 6481 else: 6482 modifier = None 6483 6484 return self.expression( 6485 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6486 ) 6487 6488 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6489 def _parse_open_json(self) -> exp.OpenJSON: 6490 this = self._parse_bitwise() 6491 path = self._match(TokenType.COMMA) and self._parse_string() 6492 6493 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6494 this = self._parse_field(any_token=True) 6495 kind = self._parse_types() 6496 path = self._parse_string() 6497 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6498 6499 return self.expression( 6500 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6501 ) 6502 6503 expressions = None 6504 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6505 self._match_l_paren() 6506 expressions = self._parse_csv(_parse_open_json_column_def) 6507 6508 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6509 6510 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6511 args = self._parse_csv(self._parse_bitwise) 6512 6513 if self._match(TokenType.IN): 6514 return self.expression( 6515 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6516 ) 6517 6518 if haystack_first: 6519 haystack = seq_get(args, 0) 6520 needle = seq_get(args, 1) 6521 else: 6522 haystack = seq_get(args, 1) 6523 needle = seq_get(args, 0) 6524 6525 return self.expression( 6526 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6527 ) 6528 6529 def _parse_predict(self) -> exp.Predict: 6530 self._match_text_seq("MODEL") 6531 this = self._parse_table() 6532 6533 self._match(TokenType.COMMA) 6534 self._match_text_seq("TABLE") 6535 6536 return self.expression( 6537 exp.Predict, 6538 this=this, 6539 expression=self._parse_table(), 6540 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6541 ) 6542 6543 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6544 args = self._parse_csv(self._parse_table) 6545 return exp.JoinHint(this=func_name.upper(), expressions=args) 6546 6547 def _parse_substring(self) -> exp.Substring: 6548 # Postgres supports the form: substring(string [from int] [for int]) 6549 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6550 6551 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6552 6553 if self._match(TokenType.FROM): 6554 args.append(self._parse_bitwise()) 6555 if self._match(TokenType.FOR): 6556 if len(args) == 1: 6557 args.append(exp.Literal.number(1)) 6558 args.append(self._parse_bitwise()) 6559 6560 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6561 6562 def _parse_trim(self) -> exp.Trim: 6563 # https://www.w3resource.com/sql/character-functions/trim.php 6564 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6565 6566 position = None 6567 collation = None 6568 expression = None 6569 6570 if self._match_texts(self.TRIM_TYPES): 6571 position = self._prev.text.upper() 6572 6573 this = self._parse_bitwise() 6574 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6575 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6576 expression = self._parse_bitwise() 6577 6578 if invert_order: 6579 this, expression = expression, this 6580 6581 if self._match(TokenType.COLLATE): 6582 collation = self._parse_bitwise() 6583 6584 return self.expression( 6585 exp.Trim, this=this, position=position, expression=expression, collation=collation 6586 ) 6587 6588 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6589 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6590 6591 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6592 return self._parse_window(self._parse_id_var(), alias=True) 6593 6594 def _parse_respect_or_ignore_nulls( 6595 self, this: t.Optional[exp.Expression] 6596 ) -> t.Optional[exp.Expression]: 6597 if self._match_text_seq("IGNORE", "NULLS"): 6598 return self.expression(exp.IgnoreNulls, this=this) 6599 if self._match_text_seq("RESPECT", "NULLS"): 6600 return self.expression(exp.RespectNulls, this=this) 6601 return this 6602 6603 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6604 if self._match(TokenType.HAVING): 6605 self._match_texts(("MAX", "MIN")) 6606 max = self._prev.text.upper() != "MIN" 6607 return self.expression( 6608 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6609 ) 6610 6611 return this 6612 6613 def _parse_window( 6614 self, this: t.Optional[exp.Expression], alias: bool = False 6615 ) -> t.Optional[exp.Expression]: 6616 func = this 6617 comments = func.comments if isinstance(func, exp.Expression) else None 6618 6619 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6620 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6621 if self._match_text_seq("WITHIN", "GROUP"): 6622 order = self._parse_wrapped(self._parse_order) 6623 this = self.expression(exp.WithinGroup, this=this, expression=order) 6624 6625 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6626 self._match(TokenType.WHERE) 6627 this = self.expression( 6628 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6629 ) 6630 self._match_r_paren() 6631 6632 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6633 # Some dialects choose to implement and some do not. 6634 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6635 6636 # There is some code above in _parse_lambda that handles 6637 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6638 6639 # The below changes handle 6640 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6641 6642 # Oracle allows both formats 6643 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6644 # and Snowflake chose to do the same for familiarity 6645 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6646 if isinstance(this, exp.AggFunc): 6647 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6648 6649 if ignore_respect and ignore_respect is not this: 6650 ignore_respect.replace(ignore_respect.this) 6651 this = self.expression(ignore_respect.__class__, this=this) 6652 6653 this = self._parse_respect_or_ignore_nulls(this) 6654 6655 # bigquery select from window x AS (partition by ...) 6656 if alias: 6657 over = None 6658 self._match(TokenType.ALIAS) 6659 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6660 return this 6661 else: 6662 over = self._prev.text.upper() 6663 6664 if comments and isinstance(func, exp.Expression): 6665 func.pop_comments() 6666 6667 if not self._match(TokenType.L_PAREN): 6668 return self.expression( 6669 exp.Window, 6670 comments=comments, 6671 this=this, 6672 alias=self._parse_id_var(False), 6673 over=over, 6674 ) 6675 6676 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6677 6678 first = self._match(TokenType.FIRST) 6679 if self._match_text_seq("LAST"): 6680 first = False 6681 6682 partition, order = self._parse_partition_and_order() 6683 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6684 6685 if kind: 6686 self._match(TokenType.BETWEEN) 6687 start = self._parse_window_spec() 6688 self._match(TokenType.AND) 6689 end = self._parse_window_spec() 6690 6691 spec = self.expression( 6692 exp.WindowSpec, 6693 kind=kind, 6694 start=start["value"], 6695 start_side=start["side"], 6696 end=end["value"], 6697 end_side=end["side"], 6698 ) 6699 else: 6700 spec = None 6701 6702 self._match_r_paren() 6703 6704 window = self.expression( 6705 exp.Window, 6706 comments=comments, 6707 this=this, 6708 partition_by=partition, 6709 order=order, 6710 spec=spec, 6711 alias=window_alias, 6712 over=over, 6713 first=first, 6714 ) 6715 6716 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6717 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6718 return self._parse_window(window, alias=alias) 6719 6720 return window 6721 6722 def _parse_partition_and_order( 6723 self, 6724 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6725 return self._parse_partition_by(), self._parse_order() 6726 6727 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6728 self._match(TokenType.BETWEEN) 6729 6730 return { 6731 "value": ( 6732 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6733 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6734 or self._parse_bitwise() 6735 ), 6736 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6737 } 6738 6739 def _parse_alias( 6740 self, this: t.Optional[exp.Expression], explicit: bool = False 6741 ) -> t.Optional[exp.Expression]: 6742 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 6743 # so this section tries to parse the clause version and if it fails, it treats the token 6744 # as an identifier (alias) 6745 if self._can_parse_limit_or_offset(): 6746 return this 6747 6748 any_token = self._match(TokenType.ALIAS) 6749 comments = self._prev_comments or [] 6750 6751 if explicit and not any_token: 6752 return this 6753 6754 if self._match(TokenType.L_PAREN): 6755 aliases = self.expression( 6756 exp.Aliases, 6757 comments=comments, 6758 this=this, 6759 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6760 ) 6761 self._match_r_paren(aliases) 6762 return aliases 6763 6764 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6765 self.STRING_ALIASES and self._parse_string_as_identifier() 6766 ) 6767 6768 if alias: 6769 comments.extend(alias.pop_comments()) 6770 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6771 column = this.this 6772 6773 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6774 if not this.comments and column and column.comments: 6775 this.comments = column.pop_comments() 6776 6777 return this 6778 6779 def _parse_id_var( 6780 self, 6781 any_token: bool = True, 6782 tokens: t.Optional[t.Collection[TokenType]] = None, 6783 ) -> t.Optional[exp.Expression]: 6784 expression = self._parse_identifier() 6785 if not expression and ( 6786 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6787 ): 6788 quoted = self._prev.token_type == TokenType.STRING 6789 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6790 6791 return expression 6792 6793 def _parse_string(self) -> t.Optional[exp.Expression]: 6794 if self._match_set(self.STRING_PARSERS): 6795 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6796 return self._parse_placeholder() 6797 6798 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6799 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6800 6801 def _parse_number(self) -> t.Optional[exp.Expression]: 6802 if self._match_set(self.NUMERIC_PARSERS): 6803 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6804 return self._parse_placeholder() 6805 6806 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6807 if self._match(TokenType.IDENTIFIER): 6808 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6809 return self._parse_placeholder() 6810 6811 def _parse_var( 6812 self, 6813 any_token: bool = False, 6814 tokens: t.Optional[t.Collection[TokenType]] = None, 6815 upper: bool = False, 6816 ) -> t.Optional[exp.Expression]: 6817 if ( 6818 (any_token and self._advance_any()) 6819 or self._match(TokenType.VAR) 6820 or (self._match_set(tokens) if tokens else False) 6821 ): 6822 return self.expression( 6823 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6824 ) 6825 return self._parse_placeholder() 6826 6827 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6828 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6829 self._advance() 6830 return self._prev 6831 return None 6832 6833 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6834 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6835 6836 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6837 return self._parse_primary() or self._parse_var(any_token=True) 6838 6839 def _parse_null(self) -> t.Optional[exp.Expression]: 6840 if self._match_set(self.NULL_TOKENS): 6841 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6842 return self._parse_placeholder() 6843 6844 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6845 if self._match(TokenType.TRUE): 6846 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6847 if self._match(TokenType.FALSE): 6848 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6849 return self._parse_placeholder() 6850 6851 def _parse_star(self) -> t.Optional[exp.Expression]: 6852 if self._match(TokenType.STAR): 6853 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6854 return self._parse_placeholder() 6855 6856 def _parse_parameter(self) -> exp.Parameter: 6857 this = self._parse_identifier() or self._parse_primary_or_var() 6858 return self.expression(exp.Parameter, this=this) 6859 6860 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6861 if self._match_set(self.PLACEHOLDER_PARSERS): 6862 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6863 if placeholder: 6864 return placeholder 6865 self._advance(-1) 6866 return None 6867 6868 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6869 if not self._match_texts(keywords): 6870 return None 6871 if self._match(TokenType.L_PAREN, advance=False): 6872 return self._parse_wrapped_csv(self._parse_expression) 6873 6874 expression = self._parse_expression() 6875 return [expression] if expression else None 6876 6877 def _parse_csv( 6878 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6879 ) -> t.List[exp.Expression]: 6880 parse_result = parse_method() 6881 items = [parse_result] if parse_result is not None else [] 6882 6883 while self._match(sep): 6884 self._add_comments(parse_result) 6885 parse_result = parse_method() 6886 if parse_result is not None: 6887 items.append(parse_result) 6888 6889 return items 6890 6891 def _parse_tokens( 6892 self, parse_method: t.Callable, expressions: t.Dict 6893 ) -> t.Optional[exp.Expression]: 6894 this = parse_method() 6895 6896 while self._match_set(expressions): 6897 this = self.expression( 6898 expressions[self._prev.token_type], 6899 this=this, 6900 comments=self._prev_comments, 6901 expression=parse_method(), 6902 ) 6903 6904 return this 6905 6906 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6907 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6908 6909 def _parse_wrapped_csv( 6910 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6911 ) -> t.List[exp.Expression]: 6912 return self._parse_wrapped( 6913 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6914 ) 6915 6916 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6917 wrapped = self._match(TokenType.L_PAREN) 6918 if not wrapped and not optional: 6919 self.raise_error("Expecting (") 6920 parse_result = parse_method() 6921 if wrapped: 6922 self._match_r_paren() 6923 return parse_result 6924 6925 def _parse_expressions(self) -> t.List[exp.Expression]: 6926 return self._parse_csv(self._parse_expression) 6927 6928 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6929 return self._parse_select() or self._parse_set_operations( 6930 self._parse_alias(self._parse_assignment(), explicit=True) 6931 if alias 6932 else self._parse_assignment() 6933 ) 6934 6935 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6936 return self._parse_query_modifiers( 6937 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6938 ) 6939 6940 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6941 this = None 6942 if self._match_texts(self.TRANSACTION_KIND): 6943 this = self._prev.text 6944 6945 self._match_texts(("TRANSACTION", "WORK")) 6946 6947 modes = [] 6948 while True: 6949 mode = [] 6950 while self._match(TokenType.VAR): 6951 mode.append(self._prev.text) 6952 6953 if mode: 6954 modes.append(" ".join(mode)) 6955 if not self._match(TokenType.COMMA): 6956 break 6957 6958 return self.expression(exp.Transaction, this=this, modes=modes) 6959 6960 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6961 chain = None 6962 savepoint = None 6963 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6964 6965 self._match_texts(("TRANSACTION", "WORK")) 6966 6967 if self._match_text_seq("TO"): 6968 self._match_text_seq("SAVEPOINT") 6969 savepoint = self._parse_id_var() 6970 6971 if self._match(TokenType.AND): 6972 chain = not self._match_text_seq("NO") 6973 self._match_text_seq("CHAIN") 6974 6975 if is_rollback: 6976 return self.expression(exp.Rollback, savepoint=savepoint) 6977 6978 return self.expression(exp.Commit, chain=chain) 6979 6980 def _parse_refresh(self) -> exp.Refresh: 6981 self._match(TokenType.TABLE) 6982 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6983 6984 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6985 if not self._match_text_seq("ADD"): 6986 return None 6987 6988 self._match(TokenType.COLUMN) 6989 exists_column = self._parse_exists(not_=True) 6990 expression = self._parse_field_def() 6991 6992 if expression: 6993 expression.set("exists", exists_column) 6994 6995 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6996 if self._match_texts(("FIRST", "AFTER")): 6997 position = self._prev.text 6998 column_position = self.expression( 6999 exp.ColumnPosition, this=self._parse_column(), position=position 7000 ) 7001 expression.set("position", column_position) 7002 7003 return expression 7004 7005 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7006 drop = self._match(TokenType.DROP) and self._parse_drop() 7007 if drop and not isinstance(drop, exp.Command): 7008 drop.set("kind", drop.args.get("kind", "COLUMN")) 7009 return drop 7010 7011 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7012 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7013 return self.expression( 7014 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7015 ) 7016 7017 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7018 index = self._index - 1 7019 7020 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7021 return self._parse_csv( 7022 lambda: self.expression( 7023 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7024 ) 7025 ) 7026 7027 self._retreat(index) 7028 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 7029 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 7030 7031 if self._match_text_seq("ADD", "COLUMNS"): 7032 schema = self._parse_schema() 7033 if schema: 7034 return [schema] 7035 return [] 7036 7037 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 7038 7039 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7040 if self._match_texts(self.ALTER_ALTER_PARSERS): 7041 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7042 7043 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7044 # keyword after ALTER we default to parsing this statement 7045 self._match(TokenType.COLUMN) 7046 column = self._parse_field(any_token=True) 7047 7048 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7049 return self.expression(exp.AlterColumn, this=column, drop=True) 7050 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7051 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7052 if self._match(TokenType.COMMENT): 7053 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7054 if self._match_text_seq("DROP", "NOT", "NULL"): 7055 return self.expression( 7056 exp.AlterColumn, 7057 this=column, 7058 drop=True, 7059 allow_null=True, 7060 ) 7061 if self._match_text_seq("SET", "NOT", "NULL"): 7062 return self.expression( 7063 exp.AlterColumn, 7064 this=column, 7065 allow_null=False, 7066 ) 7067 self._match_text_seq("SET", "DATA") 7068 self._match_text_seq("TYPE") 7069 return self.expression( 7070 exp.AlterColumn, 7071 this=column, 7072 dtype=self._parse_types(), 7073 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7074 using=self._match(TokenType.USING) and self._parse_assignment(), 7075 ) 7076 7077 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7078 if self._match_texts(("ALL", "EVEN", "AUTO")): 7079 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7080 7081 self._match_text_seq("KEY", "DISTKEY") 7082 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7083 7084 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7085 if compound: 7086 self._match_text_seq("SORTKEY") 7087 7088 if self._match(TokenType.L_PAREN, advance=False): 7089 return self.expression( 7090 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7091 ) 7092 7093 self._match_texts(("AUTO", "NONE")) 7094 return self.expression( 7095 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7096 ) 7097 7098 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7099 index = self._index - 1 7100 7101 partition_exists = self._parse_exists() 7102 if self._match(TokenType.PARTITION, advance=False): 7103 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7104 7105 self._retreat(index) 7106 return self._parse_csv(self._parse_drop_column) 7107 7108 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7109 if self._match(TokenType.COLUMN): 7110 exists = self._parse_exists() 7111 old_column = self._parse_column() 7112 to = self._match_text_seq("TO") 7113 new_column = self._parse_column() 7114 7115 if old_column is None or to is None or new_column is None: 7116 return None 7117 7118 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7119 7120 self._match_text_seq("TO") 7121 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7122 7123 def _parse_alter_table_set(self) -> exp.AlterSet: 7124 alter_set = self.expression(exp.AlterSet) 7125 7126 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7127 "TABLE", "PROPERTIES" 7128 ): 7129 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7130 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7131 alter_set.set("expressions", [self._parse_assignment()]) 7132 elif self._match_texts(("LOGGED", "UNLOGGED")): 7133 alter_set.set("option", exp.var(self._prev.text.upper())) 7134 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7135 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7136 elif self._match_text_seq("LOCATION"): 7137 alter_set.set("location", self._parse_field()) 7138 elif self._match_text_seq("ACCESS", "METHOD"): 7139 alter_set.set("access_method", self._parse_field()) 7140 elif self._match_text_seq("TABLESPACE"): 7141 alter_set.set("tablespace", self._parse_field()) 7142 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7143 alter_set.set("file_format", [self._parse_field()]) 7144 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7145 alter_set.set("file_format", self._parse_wrapped_options()) 7146 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7147 alter_set.set("copy_options", self._parse_wrapped_options()) 7148 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7149 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7150 else: 7151 if self._match_text_seq("SERDE"): 7152 alter_set.set("serde", self._parse_field()) 7153 7154 alter_set.set("expressions", [self._parse_properties()]) 7155 7156 return alter_set 7157 7158 def _parse_alter(self) -> exp.Alter | exp.Command: 7159 start = self._prev 7160 7161 alter_token = self._match_set(self.ALTERABLES) and self._prev 7162 if not alter_token: 7163 return self._parse_as_command(start) 7164 7165 exists = self._parse_exists() 7166 only = self._match_text_seq("ONLY") 7167 this = self._parse_table(schema=True) 7168 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7169 7170 if self._next: 7171 self._advance() 7172 7173 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7174 if parser: 7175 actions = ensure_list(parser(self)) 7176 not_valid = self._match_text_seq("NOT", "VALID") 7177 options = self._parse_csv(self._parse_property) 7178 7179 if not self._curr and actions: 7180 return self.expression( 7181 exp.Alter, 7182 this=this, 7183 kind=alter_token.text.upper(), 7184 exists=exists, 7185 actions=actions, 7186 only=only, 7187 options=options, 7188 cluster=cluster, 7189 not_valid=not_valid, 7190 ) 7191 7192 return self._parse_as_command(start) 7193 7194 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7195 start = self._prev 7196 # https://duckdb.org/docs/sql/statements/analyze 7197 if not self._curr: 7198 return self.expression(exp.Analyze) 7199 7200 options = [] 7201 while self._match_texts(self.ANALYZE_STYLES): 7202 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7203 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7204 else: 7205 options.append(self._prev.text.upper()) 7206 7207 this: t.Optional[exp.Expression] = None 7208 inner_expression: t.Optional[exp.Expression] = None 7209 7210 kind = self._curr and self._curr.text.upper() 7211 7212 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7213 this = self._parse_table_parts() 7214 elif self._match_text_seq("TABLES"): 7215 if self._match_set((TokenType.FROM, TokenType.IN)): 7216 kind = f"{kind} {self._prev.text.upper()}" 7217 this = self._parse_table(schema=True, is_db_reference=True) 7218 elif self._match_text_seq("DATABASE"): 7219 this = self._parse_table(schema=True, is_db_reference=True) 7220 elif self._match_text_seq("CLUSTER"): 7221 this = self._parse_table() 7222 # Try matching inner expr keywords before fallback to parse table. 7223 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7224 kind = None 7225 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7226 else: 7227 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7228 kind = None 7229 this = self._parse_table_parts() 7230 7231 partition = self._try_parse(self._parse_partition) 7232 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7233 return self._parse_as_command(start) 7234 7235 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7236 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7237 "WITH", "ASYNC", "MODE" 7238 ): 7239 mode = f"WITH {self._tokens[self._index-2].text.upper()} MODE" 7240 else: 7241 mode = None 7242 7243 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7244 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7245 7246 properties = self._parse_properties() 7247 return self.expression( 7248 exp.Analyze, 7249 kind=kind, 7250 this=this, 7251 mode=mode, 7252 partition=partition, 7253 properties=properties, 7254 expression=inner_expression, 7255 options=options, 7256 ) 7257 7258 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7259 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7260 this = None 7261 kind = self._prev.text.upper() 7262 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7263 expressions = [] 7264 7265 if not self._match_text_seq("STATISTICS"): 7266 self.raise_error("Expecting token STATISTICS") 7267 7268 if self._match_text_seq("NOSCAN"): 7269 this = "NOSCAN" 7270 elif self._match(TokenType.FOR): 7271 if self._match_text_seq("ALL", "COLUMNS"): 7272 this = "FOR ALL COLUMNS" 7273 if self._match_texts("COLUMNS"): 7274 this = "FOR COLUMNS" 7275 expressions = self._parse_csv(self._parse_column_reference) 7276 elif self._match_text_seq("SAMPLE"): 7277 sample = self._parse_number() 7278 expressions = [ 7279 self.expression( 7280 exp.AnalyzeSample, 7281 sample=sample, 7282 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7283 ) 7284 ] 7285 7286 return self.expression( 7287 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7288 ) 7289 7290 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7291 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7292 kind = None 7293 this = None 7294 expression: t.Optional[exp.Expression] = None 7295 if self._match_text_seq("REF", "UPDATE"): 7296 kind = "REF" 7297 this = "UPDATE" 7298 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7299 this = "UPDATE SET DANGLING TO NULL" 7300 elif self._match_text_seq("STRUCTURE"): 7301 kind = "STRUCTURE" 7302 if self._match_text_seq("CASCADE", "FAST"): 7303 this = "CASCADE FAST" 7304 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7305 ("ONLINE", "OFFLINE") 7306 ): 7307 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7308 expression = self._parse_into() 7309 7310 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7311 7312 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7313 this = self._prev.text.upper() 7314 if self._match_text_seq("COLUMNS"): 7315 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7316 return None 7317 7318 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7319 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7320 if self._match_text_seq("STATISTICS"): 7321 return self.expression(exp.AnalyzeDelete, kind=kind) 7322 return None 7323 7324 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7325 if self._match_text_seq("CHAINED", "ROWS"): 7326 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7327 return None 7328 7329 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7330 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7331 this = self._prev.text.upper() 7332 expression: t.Optional[exp.Expression] = None 7333 expressions = [] 7334 update_options = None 7335 7336 if self._match_text_seq("HISTOGRAM", "ON"): 7337 expressions = self._parse_csv(self._parse_column_reference) 7338 with_expressions = [] 7339 while self._match(TokenType.WITH): 7340 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7341 if self._match_texts(("SYNC", "ASYNC")): 7342 if self._match_text_seq("MODE", advance=False): 7343 with_expressions.append(f"{self._prev.text.upper()} MODE") 7344 self._advance() 7345 else: 7346 buckets = self._parse_number() 7347 if self._match_text_seq("BUCKETS"): 7348 with_expressions.append(f"{buckets} BUCKETS") 7349 if with_expressions: 7350 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7351 7352 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7353 TokenType.UPDATE, advance=False 7354 ): 7355 update_options = self._prev.text.upper() 7356 self._advance() 7357 elif self._match_text_seq("USING", "DATA"): 7358 expression = self.expression(exp.UsingData, this=self._parse_string()) 7359 7360 return self.expression( 7361 exp.AnalyzeHistogram, 7362 this=this, 7363 expressions=expressions, 7364 expression=expression, 7365 update_options=update_options, 7366 ) 7367 7368 def _parse_merge(self) -> exp.Merge: 7369 self._match(TokenType.INTO) 7370 target = self._parse_table() 7371 7372 if target and self._match(TokenType.ALIAS, advance=False): 7373 target.set("alias", self._parse_table_alias()) 7374 7375 self._match(TokenType.USING) 7376 using = self._parse_table() 7377 7378 self._match(TokenType.ON) 7379 on = self._parse_assignment() 7380 7381 return self.expression( 7382 exp.Merge, 7383 this=target, 7384 using=using, 7385 on=on, 7386 whens=self._parse_when_matched(), 7387 returning=self._parse_returning(), 7388 ) 7389 7390 def _parse_when_matched(self) -> exp.Whens: 7391 whens = [] 7392 7393 while self._match(TokenType.WHEN): 7394 matched = not self._match(TokenType.NOT) 7395 self._match_text_seq("MATCHED") 7396 source = ( 7397 False 7398 if self._match_text_seq("BY", "TARGET") 7399 else self._match_text_seq("BY", "SOURCE") 7400 ) 7401 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7402 7403 self._match(TokenType.THEN) 7404 7405 if self._match(TokenType.INSERT): 7406 this = self._parse_star() 7407 if this: 7408 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7409 else: 7410 then = self.expression( 7411 exp.Insert, 7412 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 7413 expression=self._match_text_seq("VALUES") and self._parse_value(), 7414 ) 7415 elif self._match(TokenType.UPDATE): 7416 expressions = self._parse_star() 7417 if expressions: 7418 then = self.expression(exp.Update, expressions=expressions) 7419 else: 7420 then = self.expression( 7421 exp.Update, 7422 expressions=self._match(TokenType.SET) 7423 and self._parse_csv(self._parse_equality), 7424 ) 7425 elif self._match(TokenType.DELETE): 7426 then = self.expression(exp.Var, this=self._prev.text) 7427 else: 7428 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7429 7430 whens.append( 7431 self.expression( 7432 exp.When, 7433 matched=matched, 7434 source=source, 7435 condition=condition, 7436 then=then, 7437 ) 7438 ) 7439 return self.expression(exp.Whens, expressions=whens) 7440 7441 def _parse_show(self) -> t.Optional[exp.Expression]: 7442 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7443 if parser: 7444 return parser(self) 7445 return self._parse_as_command(self._prev) 7446 7447 def _parse_set_item_assignment( 7448 self, kind: t.Optional[str] = None 7449 ) -> t.Optional[exp.Expression]: 7450 index = self._index 7451 7452 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7453 return self._parse_set_transaction(global_=kind == "GLOBAL") 7454 7455 left = self._parse_primary() or self._parse_column() 7456 assignment_delimiter = self._match_texts(("=", "TO")) 7457 7458 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7459 self._retreat(index) 7460 return None 7461 7462 right = self._parse_statement() or self._parse_id_var() 7463 if isinstance(right, (exp.Column, exp.Identifier)): 7464 right = exp.var(right.name) 7465 7466 this = self.expression(exp.EQ, this=left, expression=right) 7467 return self.expression(exp.SetItem, this=this, kind=kind) 7468 7469 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7470 self._match_text_seq("TRANSACTION") 7471 characteristics = self._parse_csv( 7472 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7473 ) 7474 return self.expression( 7475 exp.SetItem, 7476 expressions=characteristics, 7477 kind="TRANSACTION", 7478 **{"global": global_}, # type: ignore 7479 ) 7480 7481 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7482 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7483 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7484 7485 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7486 index = self._index 7487 set_ = self.expression( 7488 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7489 ) 7490 7491 if self._curr: 7492 self._retreat(index) 7493 return self._parse_as_command(self._prev) 7494 7495 return set_ 7496 7497 def _parse_var_from_options( 7498 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7499 ) -> t.Optional[exp.Var]: 7500 start = self._curr 7501 if not start: 7502 return None 7503 7504 option = start.text.upper() 7505 continuations = options.get(option) 7506 7507 index = self._index 7508 self._advance() 7509 for keywords in continuations or []: 7510 if isinstance(keywords, str): 7511 keywords = (keywords,) 7512 7513 if self._match_text_seq(*keywords): 7514 option = f"{option} {' '.join(keywords)}" 7515 break 7516 else: 7517 if continuations or continuations is None: 7518 if raise_unmatched: 7519 self.raise_error(f"Unknown option {option}") 7520 7521 self._retreat(index) 7522 return None 7523 7524 return exp.var(option) 7525 7526 def _parse_as_command(self, start: Token) -> exp.Command: 7527 while self._curr: 7528 self._advance() 7529 text = self._find_sql(start, self._prev) 7530 size = len(start.text) 7531 self._warn_unsupported() 7532 return exp.Command(this=text[:size], expression=text[size:]) 7533 7534 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7535 settings = [] 7536 7537 self._match_l_paren() 7538 kind = self._parse_id_var() 7539 7540 if self._match(TokenType.L_PAREN): 7541 while True: 7542 key = self._parse_id_var() 7543 value = self._parse_primary() 7544 if not key and value is None: 7545 break 7546 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7547 self._match(TokenType.R_PAREN) 7548 7549 self._match_r_paren() 7550 7551 return self.expression( 7552 exp.DictProperty, 7553 this=this, 7554 kind=kind.this if kind else None, 7555 settings=settings, 7556 ) 7557 7558 def _parse_dict_range(self, this: str) -> exp.DictRange: 7559 self._match_l_paren() 7560 has_min = self._match_text_seq("MIN") 7561 if has_min: 7562 min = self._parse_var() or self._parse_primary() 7563 self._match_text_seq("MAX") 7564 max = self._parse_var() or self._parse_primary() 7565 else: 7566 max = self._parse_var() or self._parse_primary() 7567 min = exp.Literal.number(0) 7568 self._match_r_paren() 7569 return self.expression(exp.DictRange, this=this, min=min, max=max) 7570 7571 def _parse_comprehension( 7572 self, this: t.Optional[exp.Expression] 7573 ) -> t.Optional[exp.Comprehension]: 7574 index = self._index 7575 expression = self._parse_column() 7576 if not self._match(TokenType.IN): 7577 self._retreat(index - 1) 7578 return None 7579 iterator = self._parse_column() 7580 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7581 return self.expression( 7582 exp.Comprehension, 7583 this=this, 7584 expression=expression, 7585 iterator=iterator, 7586 condition=condition, 7587 ) 7588 7589 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7590 if self._match(TokenType.HEREDOC_STRING): 7591 return self.expression(exp.Heredoc, this=self._prev.text) 7592 7593 if not self._match_text_seq("$"): 7594 return None 7595 7596 tags = ["$"] 7597 tag_text = None 7598 7599 if self._is_connected(): 7600 self._advance() 7601 tags.append(self._prev.text.upper()) 7602 else: 7603 self.raise_error("No closing $ found") 7604 7605 if tags[-1] != "$": 7606 if self._is_connected() and self._match_text_seq("$"): 7607 tag_text = tags[-1] 7608 tags.append("$") 7609 else: 7610 self.raise_error("No closing $ found") 7611 7612 heredoc_start = self._curr 7613 7614 while self._curr: 7615 if self._match_text_seq(*tags, advance=False): 7616 this = self._find_sql(heredoc_start, self._prev) 7617 self._advance(len(tags)) 7618 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7619 7620 self._advance() 7621 7622 self.raise_error(f"No closing {''.join(tags)} found") 7623 return None 7624 7625 def _find_parser( 7626 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7627 ) -> t.Optional[t.Callable]: 7628 if not self._curr: 7629 return None 7630 7631 index = self._index 7632 this = [] 7633 while True: 7634 # The current token might be multiple words 7635 curr = self._curr.text.upper() 7636 key = curr.split(" ") 7637 this.append(curr) 7638 7639 self._advance() 7640 result, trie = in_trie(trie, key) 7641 if result == TrieResult.FAILED: 7642 break 7643 7644 if result == TrieResult.EXISTS: 7645 subparser = parsers[" ".join(this)] 7646 return subparser 7647 7648 self._retreat(index) 7649 return None 7650 7651 def _match(self, token_type, advance=True, expression=None): 7652 if not self._curr: 7653 return None 7654 7655 if self._curr.token_type == token_type: 7656 if advance: 7657 self._advance() 7658 self._add_comments(expression) 7659 return True 7660 7661 return None 7662 7663 def _match_set(self, types, advance=True): 7664 if not self._curr: 7665 return None 7666 7667 if self._curr.token_type in types: 7668 if advance: 7669 self._advance() 7670 return True 7671 7672 return None 7673 7674 def _match_pair(self, token_type_a, token_type_b, advance=True): 7675 if not self._curr or not self._next: 7676 return None 7677 7678 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7679 if advance: 7680 self._advance(2) 7681 return True 7682 7683 return None 7684 7685 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7686 if not self._match(TokenType.L_PAREN, expression=expression): 7687 self.raise_error("Expecting (") 7688 7689 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7690 if not self._match(TokenType.R_PAREN, expression=expression): 7691 self.raise_error("Expecting )") 7692 7693 def _match_texts(self, texts, advance=True): 7694 if ( 7695 self._curr 7696 and self._curr.token_type != TokenType.STRING 7697 and self._curr.text.upper() in texts 7698 ): 7699 if advance: 7700 self._advance() 7701 return True 7702 return None 7703 7704 def _match_text_seq(self, *texts, advance=True): 7705 index = self._index 7706 for text in texts: 7707 if ( 7708 self._curr 7709 and self._curr.token_type != TokenType.STRING 7710 and self._curr.text.upper() == text 7711 ): 7712 self._advance() 7713 else: 7714 self._retreat(index) 7715 return None 7716 7717 if not advance: 7718 self._retreat(index) 7719 7720 return True 7721 7722 def _replace_lambda( 7723 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7724 ) -> t.Optional[exp.Expression]: 7725 if not node: 7726 return node 7727 7728 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7729 7730 for column in node.find_all(exp.Column): 7731 typ = lambda_types.get(column.parts[0].name) 7732 if typ is not None: 7733 dot_or_id = column.to_dot() if column.table else column.this 7734 7735 if typ: 7736 dot_or_id = self.expression( 7737 exp.Cast, 7738 this=dot_or_id, 7739 to=typ, 7740 ) 7741 7742 parent = column.parent 7743 7744 while isinstance(parent, exp.Dot): 7745 if not isinstance(parent.parent, exp.Dot): 7746 parent.replace(dot_or_id) 7747 break 7748 parent = parent.parent 7749 else: 7750 if column is node: 7751 node = dot_or_id 7752 else: 7753 column.replace(dot_or_id) 7754 return node 7755 7756 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7757 start = self._prev 7758 7759 # Not to be confused with TRUNCATE(number, decimals) function call 7760 if self._match(TokenType.L_PAREN): 7761 self._retreat(self._index - 2) 7762 return self._parse_function() 7763 7764 # Clickhouse supports TRUNCATE DATABASE as well 7765 is_database = self._match(TokenType.DATABASE) 7766 7767 self._match(TokenType.TABLE) 7768 7769 exists = self._parse_exists(not_=False) 7770 7771 expressions = self._parse_csv( 7772 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7773 ) 7774 7775 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7776 7777 if self._match_text_seq("RESTART", "IDENTITY"): 7778 identity = "RESTART" 7779 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7780 identity = "CONTINUE" 7781 else: 7782 identity = None 7783 7784 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7785 option = self._prev.text 7786 else: 7787 option = None 7788 7789 partition = self._parse_partition() 7790 7791 # Fallback case 7792 if self._curr: 7793 return self._parse_as_command(start) 7794 7795 return self.expression( 7796 exp.TruncateTable, 7797 expressions=expressions, 7798 is_database=is_database, 7799 exists=exists, 7800 cluster=cluster, 7801 identity=identity, 7802 option=option, 7803 partition=partition, 7804 ) 7805 7806 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7807 this = self._parse_ordered(self._parse_opclass) 7808 7809 if not self._match(TokenType.WITH): 7810 return this 7811 7812 op = self._parse_var(any_token=True) 7813 7814 return self.expression(exp.WithOperator, this=this, op=op) 7815 7816 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7817 self._match(TokenType.EQ) 7818 self._match(TokenType.L_PAREN) 7819 7820 opts: t.List[t.Optional[exp.Expression]] = [] 7821 while self._curr and not self._match(TokenType.R_PAREN): 7822 if self._match_text_seq("FORMAT_NAME", "="): 7823 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7824 # so we parse it separately to use _parse_field() 7825 prop = self.expression( 7826 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7827 ) 7828 opts.append(prop) 7829 else: 7830 opts.append(self._parse_property()) 7831 7832 self._match(TokenType.COMMA) 7833 7834 return opts 7835 7836 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7837 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7838 7839 options = [] 7840 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7841 option = self._parse_var(any_token=True) 7842 prev = self._prev.text.upper() 7843 7844 # Different dialects might separate options and values by white space, "=" and "AS" 7845 self._match(TokenType.EQ) 7846 self._match(TokenType.ALIAS) 7847 7848 param = self.expression(exp.CopyParameter, this=option) 7849 7850 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7851 TokenType.L_PAREN, advance=False 7852 ): 7853 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7854 param.set("expressions", self._parse_wrapped_options()) 7855 elif prev == "FILE_FORMAT": 7856 # T-SQL's external file format case 7857 param.set("expression", self._parse_field()) 7858 else: 7859 param.set("expression", self._parse_unquoted_field()) 7860 7861 options.append(param) 7862 self._match(sep) 7863 7864 return options 7865 7866 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7867 expr = self.expression(exp.Credentials) 7868 7869 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7870 expr.set("storage", self._parse_field()) 7871 if self._match_text_seq("CREDENTIALS"): 7872 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7873 creds = ( 7874 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7875 ) 7876 expr.set("credentials", creds) 7877 if self._match_text_seq("ENCRYPTION"): 7878 expr.set("encryption", self._parse_wrapped_options()) 7879 if self._match_text_seq("IAM_ROLE"): 7880 expr.set("iam_role", self._parse_field()) 7881 if self._match_text_seq("REGION"): 7882 expr.set("region", self._parse_field()) 7883 7884 return expr 7885 7886 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7887 return self._parse_field() 7888 7889 def _parse_copy(self) -> exp.Copy | exp.Command: 7890 start = self._prev 7891 7892 self._match(TokenType.INTO) 7893 7894 this = ( 7895 self._parse_select(nested=True, parse_subquery_alias=False) 7896 if self._match(TokenType.L_PAREN, advance=False) 7897 else self._parse_table(schema=True) 7898 ) 7899 7900 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7901 7902 files = self._parse_csv(self._parse_file_location) 7903 credentials = self._parse_credentials() 7904 7905 self._match_text_seq("WITH") 7906 7907 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7908 7909 # Fallback case 7910 if self._curr: 7911 return self._parse_as_command(start) 7912 7913 return self.expression( 7914 exp.Copy, 7915 this=this, 7916 kind=kind, 7917 credentials=credentials, 7918 files=files, 7919 params=params, 7920 ) 7921 7922 def _parse_normalize(self) -> exp.Normalize: 7923 return self.expression( 7924 exp.Normalize, 7925 this=self._parse_bitwise(), 7926 form=self._match(TokenType.COMMA) and self._parse_var(), 7927 ) 7928 7929 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 7930 args = self._parse_csv(lambda: self._parse_lambda()) 7931 7932 this = seq_get(args, 0) 7933 decimals = seq_get(args, 1) 7934 7935 return expr_type( 7936 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 7937 ) 7938 7939 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7940 if self._match_text_seq("COLUMNS", "(", advance=False): 7941 this = self._parse_function() 7942 if isinstance(this, exp.Columns): 7943 this.set("unpack", True) 7944 return this 7945 7946 return self.expression( 7947 exp.Star, 7948 **{ # type: ignore 7949 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7950 "replace": self._parse_star_op("REPLACE"), 7951 "rename": self._parse_star_op("RENAME"), 7952 }, 7953 ) 7954 7955 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7956 privilege_parts = [] 7957 7958 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7959 # (end of privilege list) or L_PAREN (start of column list) are met 7960 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7961 privilege_parts.append(self._curr.text.upper()) 7962 self._advance() 7963 7964 this = exp.var(" ".join(privilege_parts)) 7965 expressions = ( 7966 self._parse_wrapped_csv(self._parse_column) 7967 if self._match(TokenType.L_PAREN, advance=False) 7968 else None 7969 ) 7970 7971 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7972 7973 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7974 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7975 principal = self._parse_id_var() 7976 7977 if not principal: 7978 return None 7979 7980 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7981 7982 def _parse_grant(self) -> exp.Grant | exp.Command: 7983 start = self._prev 7984 7985 privileges = self._parse_csv(self._parse_grant_privilege) 7986 7987 self._match(TokenType.ON) 7988 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7989 7990 # Attempt to parse the securable e.g. MySQL allows names 7991 # such as "foo.*", "*.*" which are not easily parseable yet 7992 securable = self._try_parse(self._parse_table_parts) 7993 7994 if not securable or not self._match_text_seq("TO"): 7995 return self._parse_as_command(start) 7996 7997 principals = self._parse_csv(self._parse_grant_principal) 7998 7999 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8000 8001 if self._curr: 8002 return self._parse_as_command(start) 8003 8004 return self.expression( 8005 exp.Grant, 8006 privileges=privileges, 8007 kind=kind, 8008 securable=securable, 8009 principals=principals, 8010 grant_option=grant_option, 8011 ) 8012 8013 def _parse_overlay(self) -> exp.Overlay: 8014 return self.expression( 8015 exp.Overlay, 8016 **{ # type: ignore 8017 "this": self._parse_bitwise(), 8018 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8019 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8020 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8021 }, 8022 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1452 def __init__( 1453 self, 1454 error_level: t.Optional[ErrorLevel] = None, 1455 error_message_context: int = 100, 1456 max_errors: int = 3, 1457 dialect: DialectType = None, 1458 ): 1459 from sqlglot.dialects import Dialect 1460 1461 self.error_level = error_level or ErrorLevel.IMMEDIATE 1462 self.error_message_context = error_message_context 1463 self.max_errors = max_errors 1464 self.dialect = Dialect.get_or_raise(dialect) 1465 self.reset()
1477 def parse( 1478 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1479 ) -> t.List[t.Optional[exp.Expression]]: 1480 """ 1481 Parses a list of tokens and returns a list of syntax trees, one tree 1482 per parsed SQL statement. 1483 1484 Args: 1485 raw_tokens: The list of tokens. 1486 sql: The original SQL string, used to produce helpful debug messages. 1487 1488 Returns: 1489 The list of the produced syntax trees. 1490 """ 1491 return self._parse( 1492 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1493 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1495 def parse_into( 1496 self, 1497 expression_types: exp.IntoType, 1498 raw_tokens: t.List[Token], 1499 sql: t.Optional[str] = None, 1500 ) -> t.List[t.Optional[exp.Expression]]: 1501 """ 1502 Parses a list of tokens into a given Expression type. If a collection of Expression 1503 types is given instead, this method will try to parse the token list into each one 1504 of them, stopping at the first for which the parsing succeeds. 1505 1506 Args: 1507 expression_types: The expression type(s) to try and parse the token list into. 1508 raw_tokens: The list of tokens. 1509 sql: The original SQL string, used to produce helpful debug messages. 1510 1511 Returns: 1512 The target Expression. 1513 """ 1514 errors = [] 1515 for expression_type in ensure_list(expression_types): 1516 parser = self.EXPRESSION_PARSERS.get(expression_type) 1517 if not parser: 1518 raise TypeError(f"No parser registered for {expression_type}") 1519 1520 try: 1521 return self._parse(parser, raw_tokens, sql) 1522 except ParseError as e: 1523 e.errors[0]["into_expression"] = expression_type 1524 errors.append(e) 1525 1526 raise ParseError( 1527 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1528 errors=merge_errors(errors), 1529 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1569 def check_errors(self) -> None: 1570 """Logs or raises any found errors, depending on the chosen error level setting.""" 1571 if self.error_level == ErrorLevel.WARN: 1572 for error in self.errors: 1573 logger.error(str(error)) 1574 elif self.error_level == ErrorLevel.RAISE and self.errors: 1575 raise ParseError( 1576 concat_messages(self.errors, self.max_errors), 1577 errors=merge_errors(self.errors), 1578 )
Logs or raises any found errors, depending on the chosen error level setting.
1580 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1581 """ 1582 Appends an error in the list of recorded errors or raises it, depending on the chosen 1583 error level setting. 1584 """ 1585 token = token or self._curr or self._prev or Token.string("") 1586 start = token.start 1587 end = token.end + 1 1588 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1589 highlight = self.sql[start:end] 1590 end_context = self.sql[end : end + self.error_message_context] 1591 1592 error = ParseError.new( 1593 f"{message}. Line {token.line}, Col: {token.col}.\n" 1594 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1595 description=message, 1596 line=token.line, 1597 col=token.col, 1598 start_context=start_context, 1599 highlight=highlight, 1600 end_context=end_context, 1601 ) 1602 1603 if self.error_level == ErrorLevel.IMMEDIATE: 1604 raise error 1605 1606 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1608 def expression( 1609 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1610 ) -> E: 1611 """ 1612 Creates a new, validated Expression. 1613 1614 Args: 1615 exp_class: The expression class to instantiate. 1616 comments: An optional list of comments to attach to the expression. 1617 kwargs: The arguments to set for the expression along with their respective values. 1618 1619 Returns: 1620 The target expression. 1621 """ 1622 instance = exp_class(**kwargs) 1623 instance.add_comments(comments) if comments else self._add_comments(instance) 1624 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1631 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1632 """ 1633 Validates an Expression, making sure that all its mandatory arguments are set. 1634 1635 Args: 1636 expression: The expression to validate. 1637 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1638 1639 Returns: 1640 The validated expression. 1641 """ 1642 if self.error_level != ErrorLevel.IGNORE: 1643 for error_message in expression.error_messages(args): 1644 self.raise_error(error_message) 1645 1646 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.